linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/6] rbtree: rb_erase updates and comments
  2012-07-20 12:31 [RFC PATCH 0/6] augmented rbtree changes Michel Lespinasse
@ 2012-07-20 12:31 ` Michel Lespinasse
  2012-07-24 18:50   ` Rik van Riel
  0 siblings, 1 reply; 4+ messages in thread
From: Michel Lespinasse @ 2012-07-20 12:31 UTC (permalink / raw)
  To: riel, peterz, daniel.santos, aarcange, dwmw2, akpm; +Cc: linux-mm, linux-kernel

Minor updates to the rb_erase() function:
- Reorder code to put simplest / common case (no more than 1 child) first.
- Fetch the parent first, since it ends up being required in all 3 cases.
- Add a few comments to illustrate case 2 (node to remove has 2 childs,
  but one of them is the successor) and case 3 (node to remove has 2 childs,
  successor is a left-descendant of the right child).

Signed-off-by: Michel Lespinasse <walken@google.com>
---
 lib/rbtree.c |  115 ++++++++++++++++++++++++++++++++++++----------------------
 1 files changed, 72 insertions(+), 43 deletions(-)

diff --git a/lib/rbtree.c b/lib/rbtree.c
index 0892670..3b6ec98 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -2,7 +2,8 @@
   Red Black Trees
   (C) 1999  Andrea Arcangeli <andrea@suse.de>
   (C) 2002  David Woodhouse <dwmw2@infradead.org>
-  
+  (C) 2012  Michel Lespinasse <walken@google.com>
+
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
@@ -356,65 +357,93 @@ static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
 
 void rb_erase(struct rb_node *node, struct rb_root *root)
 {
-	struct rb_node *child, *parent;
-	int color;
+	struct rb_node *parent = rb_parent(node);
+	struct rb_node *child = node->rb_right;
+	struct rb_node *tmp = node->rb_left;
+	bool black;
+
+	if (!tmp) {
+		/* Case 1: node to erase has no more than 1 child (easy!) */
+		if (child)
+one_child:
+			rb_set_parent(child, parent);
+		if (parent) {
+			if (parent->rb_left == node)
+				parent->rb_left = child;
+			else
+				parent->rb_right = child;
+		} else
+			root->rb_node = child;
 
-	if (!node->rb_left)
-		child = node->rb_right;
-	else if (!node->rb_right)
-		child = node->rb_left;
-	else {
-		struct rb_node *old = node, *left;
+		black = rb_is_black(node);
+	} else if (!child) {
+		/* Still case 1, but this time the child is node->rb_left */
+		child = tmp;
+		goto one_child;
+	} else {
+		struct rb_node *old = node;
 
+		/*
+		 * Old is the node we want to erase. It's got left and right
+		 * children, which makes things difficult. Let's find the
+		 * next node in the tree to have it fill old's position.
+		 */
 		node = node->rb_right;
-		while ((left = node->rb_left) != NULL)
-			node = left;
+		while ((tmp = node->rb_left) != NULL)
+			node = tmp;
 
-		if (rb_parent(old)) {
-			if (rb_parent(old)->rb_left == old)
-				rb_parent(old)->rb_left = node;
+		/* Graft node (old's successor) under old's parent. */
+		if (parent) {
+			if (parent->rb_left == old)
+				parent->rb_left = node;
 			else
-				rb_parent(old)->rb_right = node;
+				parent->rb_right = node;
 		} else
 			root->rb_node = node;
 
-		child = node->rb_right;
 		parent = rb_parent(node);
-		color = rb_color(node);
+		black = rb_is_black(node);
+		node->__rb_parent_color = old->__rb_parent_color;
+
+		/*
+		 * Node doesn't have a left child, since it is old's successor,
+		 * so we can take old's left child and graft it under node.
+		 */
+		node->rb_left = tmp = old->rb_left;
+		rb_set_parent(tmp, node);
 
+		child = node->rb_right;
 		if (parent == old) {
+			/*
+			 * Case 2: old is node's parent (we are done!)
+			 *
+			 *    (o)          (n)
+			 *    / \          / \
+			 *  (x) (n)  ->  (x) (c)
+			 *        \
+			 *        (c)
+			 */
 			parent = node;
 		} else {
+			/*
+			 * Case 3: old is node's ancestor but not its parent
+			 *
+			 *    (o)          (n)
+			 *    / \          / \
+			 *  (x) (y)  ->  (x) (y)
+			 *      /            /
+			 *    (n)          (c)
+			 *      \
+			 *      (c)
+			 */
+			node->rb_right = tmp = old->rb_right;
+			parent->rb_left = child;
+			rb_set_parent(tmp, node);
 			if (child)
 				rb_set_parent(child, parent);
-			parent->rb_left = child;
-
-			node->rb_right = old->rb_right;
-			rb_set_parent(old->rb_right, node);
 		}
-
-		node->__rb_parent_color = old->__rb_parent_color;
-		node->rb_left = old->rb_left;
-		rb_set_parent(old->rb_left, node);
-
-		goto color;
 	}
-
-	parent = rb_parent(node);
-	color = rb_color(node);
-
-	if (child)
-		rb_set_parent(child, parent);
-	if (parent) {
-		if (parent->rb_left == node)
-			parent->rb_left = child;
-		else
-			parent->rb_right = child;
-	} else
-		root->rb_node = child;
-
-color:
-	if (color == RB_BLACK)
+	if (black)
 		__rb_erase_color(child, parent, root);
 }
 EXPORT_SYMBOL(rb_erase);
-- 
1.7.7.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/6] rbtree: rb_erase updates and comments
  2012-07-20 12:31 ` [PATCH 1/6] rbtree: rb_erase updates and comments Michel Lespinasse
@ 2012-07-24 18:50   ` Rik van Riel
  0 siblings, 0 replies; 4+ messages in thread
From: Rik van Riel @ 2012-07-24 18:50 UTC (permalink / raw)
  To: Michel Lespinasse
  Cc: peterz, daniel.santos, aarcange, dwmw2, akpm, linux-mm,
	linux-kernel

On 07/20/2012 08:31 AM, Michel Lespinasse wrote:
> Minor updates to the rb_erase() function:
> - Reorder code to put simplest / common case (no more than 1 child) first.
> - Fetch the parent first, since it ends up being required in all 3 cases.
> - Add a few comments to illustrate case 2 (node to remove has 2 childs,
>    but one of them is the successor) and case 3 (node to remove has 2 childs,
>    successor is a left-descendant of the right child).
>
> Signed-off-by: Michel Lespinasse<walken@google.com>

Acked-by: Rik van Riel <riel@redhat.com>

-- 
All rights reversed

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/6] rbtree: rb_erase updates and comments
       [not found] <20120729040432.25753.qmail@science.horizon.com>
@ 2012-07-29 12:36 ` Michel Lespinasse
  2012-07-29 20:27   ` George Spelvin
  0 siblings, 1 reply; 4+ messages in thread
From: Michel Lespinasse @ 2012-07-29 12:36 UTC (permalink / raw)
  To: George Spelvin; +Cc: LKML, linux-mm

On Sat, Jul 28, 2012 at 9:04 PM, George Spelvin <linux@horizon.com> wrote:
> I was just looking at the beginning of the 2-children case and wondering:
>
> +               /*
> +                * Old is the node we want to erase. It's got left and right
> +                * children, which makes things difficult. Let's find the
> +                * next node in the tree to have it fill old's position.
> +                */
>                 node = node->rb_right;
>
> Er... isn't this already available in "child"?  Why fetch it again (or make the
> compiler figure out that it doesn't have to)?

Good catch, I just failed to notice it myself :)

> Another thing you can use for comments is that if a node has a single
> child, that child must be red.  That can simplify the rb_set_parent
> code, since it does not need to read the old value.

Nicely observed, I hadn't thought of that one either.

> Then the end of case 3 of rb_erase becomes:
>         if (child)
>                 rb_set_parent_color(child, parent, RB_RED);

Yes. it's actually even nicer, because we know since the child is red,
the node being erased is black, and we can thus handle recoloring 'for
free' by setting child to black here instead of going through
__rb_erase_color() later. And if we could do that for all 1-child
cases, it might even be possible to invoke __rb_erase_color() for the
no-childs case only, at which point we can drop one of that function's
arguments. Worth investigating, I think.

> A common idiom I see in the code:
>
> +               if (parent) {
> +                       if (parent->rb_left == node)
> +                               parent->rb_left = child;
> +                       else
> +                               parent->rb_right = child;
> +               } else
> +                       root->rb_node = child;
>
> might be written more attractively as:
>
>                 if (unlikely(!parent))
>                         root->rb_node = child;
>                 else if (parent->rb_left == node)
>                         parent->rb_left = child;
>                 else
>                         parent->rb_right = child;
>
> I'm almost tempted to wrap that up in a helper function, although the
> lack of an obviously correct order for the three "struct rb_node *" parameters
> suggests that it would maybe be too confusing.

Using a helper doesn't hurt, I think. I'm not sure about the unlikely
thing because near-empty trees could be common for some workloads,
which is why I've let it the way it currently is.

> Finally, it would be interesting to look at Sedgewick's left-leaning RB-tree to see
> if it could improve things.  I'm not sure if it would, since in a multiprocessor system,
> the most important thing is minimizing cache line bouncing, which means minimizing
> writes, and he gets his code simplicity by tightening invariants, which means more
> write traffic.

Yeah, I've had a quick look at left-leaning RBtrees, but they didn't
seem like an obvious win to me. Plus, I feel like I've been thinking
about rbtrees too much already, so I kinda want to take a vacation
from them at this point :)

Thanks for your remarks, especially the one about one-child coloring
in rb_erase().

-- 
Michel "Walken" Lespinasse
A program is never fully debugged until the last user dies.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/6] rbtree: rb_erase updates and comments
  2012-07-29 12:36 ` [PATCH 1/6] rbtree: rb_erase updates and comments Michel Lespinasse
@ 2012-07-29 20:27   ` George Spelvin
  0 siblings, 0 replies; 4+ messages in thread
From: George Spelvin @ 2012-07-29 20:27 UTC (permalink / raw)
  To: linux, walken; +Cc: linux-kernel, linux-mm

>> Then the end of case 3 of rb_erase becomes:
>>         if (child)
>>                 rb_set_parent_color(child, parent, RB_RED);

> Yes. it's actually even nicer, because we know since the child is red,
> the node being erased is black, and we can thus handle recoloring 'for
> free' by setting child to black here instead of going through
> __rb_erase_color() later. And if we could do that for all 1-child
> cases, it might even be possible to invoke __rb_erase_color() for the
> no-childs case only, at which point we can drop one of that function's
> arguments. Worth investigating, I think.

I haven't wrapped my head around the recoloring functions yet.  I stopped
when I realized that if the child is red, the parent must be black,
but the child might be NULL, in which case the parent could be any color.

But... oh!  I see!  It's not the node being erased, but the node being
grafted.  If it has a red child, then it's part of a virtual 3-node, and
that can be downgraded to a 2-node without requiring any tree rebalancing!

Here's an (untested!) attempt at writing the code.  Be aware that although
I use the same variable names, there are significnt differences in what
they're used for!  In particular, "node" is never chnaged, so there's
no need for "old", and the successor is "child".

The main simplifcation I did was rename "black" to "rebalance", initially
set false, and only set "rebalance = rb_is_black(node)" when necessary.

Given the new variable uses, you could merge the "rebalance" and "parent"
variables, so rebalance = false is implied by parent = NULL.  I'm not
sure the variable saving is worth the code complexity, though.

void rb_erase(struct rb_node * const node, struct rb_root * const root)
{
	struct rb_node *parent;
	struct rb_node *child = node->rb_right;
	struct rb_node *tmp = node->rb_left;
	bool rebalance = false;

	if (!tmp) {
		/* Case 1: node to erase has no more than 1 child (easy!) */
		if (!child) {
			/* Node might be black */
			rebalance = rb_is_black(node);
		} else {
one_child:		/*
			 * Node has a child: it must be black, and the child
			 * must be red.  So promote the child to black, and
			 * no rebalancing needed.
			 */
			child->__rb_parent_color = node->__rb_parent_color;
		}
		parent = tmp = rb_parent(node);	// Hope compiler can CSE
	} else if (!child) {
		/* Still case 1, but this time the child is node->rb_left */
		child = tmp;
		goto one_child;
	} else {
		/*
		 * The node we want to erase has both left and right
		 * children, which makes things difficult.  Let's find the
		 * next node in the tree to have it fill old's position.
		 */
		tmp = child->rb_left;
		if (!tmp) {
			/*
			 * Case 2: Successor is node's right child.
			 *
			 *    (n)          (c)
			 *    / \          / \
			 *  (x) (c)  ->  (x) (z)
			 *        \
			 *        (z)
			 *
			 * Node z may or may not exist.  If it exists, it must
			 * be red, and can be promoted to black with no other
			 * tree rebalancing.
			 */
			tmp = child->rb_right;
			parent = child;
		} else {
			/*
			 * Case 3: Successor is not direct child; it
			 * is the left child of a chain (y) of other
			 * nodes.
			 *
			 *    (n)          (c)
			 *    / \          / \
			 *  (x) (y)  ->  (x) (y)
			 *      /            /
			 *    (c)          (z)
			 *      \
			 *      (z)
			 *
			 * As above, z may or may not exist.
			 */
			do {
				parent = child;
				child = tmp;
				tmp = child->rb_left;
			} while (tmp);

			child->rb_right = tmp = node->rb_right;
			rb_set_parent(tmp, child);
			parent->rb_left = tmp = child->rb_right;
		}
		// Variable usage:
		// "parent" is the bottom of "(y)", which is the same as
		//	"child" in case 2.
		// "tmp" is "(z)"
		// child->rb_right has been set correctly, as has
		// the pointer to (z).
		/*
		 * Here there are two cases, just like in case 1.
		 * If child had a right child, then child was black
		 * and its child was red, and can be promoted with no
		 * rebalancing.  If it did not, child might be either
		 * color, and its parent rebalancing if it was black.
		 */
		if (tmp)
			// tmp->__rb_parent_color = child->__rb_parent_color;
			rb_set_parent_color(tmp, parent, RB_BLACK);
		else
			rebalance = rb_is_black(child);

		child->rb_left = node->rb_left;
		child->__rb_parent_color = node->__rb_parent_color;
		tmp = rb_parent(node);	// Hope compiler can CSE
	}
	if (!tmp)
		root->rb_node = child;
	else if (tmp->rb_left == node)
		tmp->rb_left = child;
	else
		tmp->rb_right = child;
	if (rebalance)
		__rb_erase_color(NULL, parent, root);
}

> Using a helper doesn't hurt, I think. I'm not sure about the unlikely
> thing because near-empty trees could be common for some workloads,
> which is why I've let it the way it currently is.

I wasn't positive, either, but I figured it was easy enough to delete if
you disagreed.

I did think of one possible simplification: return a pointer to the parent's
child pointer and update that:

	*rb_pptr(old, parent, root) = new;

where

static inline struct rb_node **rb_pptr(struct rb_node *rb, struct rb_node *p,
		struct rb_root *root)
{
	if (!p)
		return &root->rb_node;
	else if (parent->rb_left == node)
		return &parent->rb_left;
	else
		return &parent->rb_right;
}

> Yeah, I've had a quick look at left-leaning RBtrees, but they didn't
> seem like an obvious win to me. Plus, I feel like I've been thinking
> about rbtrees too much already, so I kinda want to take a vacation
> from them at this point :)

Ah.  Then I'll stop muttering about ideas that imply major rewrites.

> Thanks for your remarks, especially the one about one-child coloring
> in rb_erase().

You're the one who spotted the major simplification, but I'm glad
to have been of some minor help.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2012-07-29 20:27 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20120729040432.25753.qmail@science.horizon.com>
2012-07-29 12:36 ` [PATCH 1/6] rbtree: rb_erase updates and comments Michel Lespinasse
2012-07-29 20:27   ` George Spelvin
2012-07-20 12:31 [RFC PATCH 0/6] augmented rbtree changes Michel Lespinasse
2012-07-20 12:31 ` [PATCH 1/6] rbtree: rb_erase updates and comments Michel Lespinasse
2012-07-24 18:50   ` Rik van Riel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).