All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: linux-mm@kvack.org
Cc: npiggin@suse.de, akpm@linux-foundation.org, clameter@sgi.com,
	a.p.zijlstra@chello.nl
Subject: [PATCH 5/6] radix-tree: optimistic locking
Date: Wed, 18 Apr 2007 22:12:53 +0200	[thread overview]
Message-ID: <20070418201606.090997479@chello.nl> (raw)
In-Reply-To: 20070418201248.468050288@chello.nl

[-- Attachment #1: radix-tree-optimistic.patch --]
[-- Type: text/plain, Size: 11963 bytes --]

Implement optimistic locking for the concurrent radix tree.

Optimistic locking is aimed at avoiding taking higher level node locks to
reduce cache-line bouncing (and lock contention). We descent the tree using an
RCU lookup, looking for the lowest modification termination point.

If found, we try to acquire the lock of that node. After we have obtained this
lock, we will need to validate if the initial conditions still hold true.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/radix-tree.h |   27 +++++-
 init/Kconfig               |    6 +
 lib/radix-tree.c           |  175 ++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 187 insertions(+), 21 deletions(-)

Index: linux-2.6/lib/radix-tree.c
===================================================================
--- linux-2.6.orig/lib/radix-tree.c	2007-04-18 09:56:20.000000000 +0200
+++ linux-2.6/lib/radix-tree.c	2007-04-18 09:56:42.000000000 +0200
@@ -362,6 +362,117 @@ static inline void radix_path_unlock(str
 #define radix_path_unlock(context, punlock) do { } while (0)
 #endif
 
+#ifdef CONFIG_RADIX_TREE_OPTIMISTIC
+typedef int (*radix_valid_fn)(struct radix_tree_node *, int, int);
+
+static struct radix_tree_node *
+radix_optimistic_lookup(struct radix_tree_context *context, unsigned long index,
+		int tag, radix_valid_fn valid)
+{
+	unsigned int height, shift;
+	struct radix_tree_node *node, *ret = NULL, **slot;
+	struct radix_tree_root *root = context->root;
+
+	node = rcu_dereference(root->rnode);
+	if (node == NULL)
+		return NULL;
+
+	if (!radix_tree_is_indirect_ptr(node))
+			return NULL;
+
+	node = radix_tree_indirect_to_ptr(node);
+
+	height = node->height;
+	if (index > radix_tree_maxindex(height))
+		return NULL;
+
+	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+	do {
+		int offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+		if ((*valid)(node, offset, tag))
+			ret = node;
+		slot = (struct radix_tree_node **)(node->slots + offset);
+		node = rcu_dereference(*slot);
+		if (!node)
+			break;
+
+		shift -= RADIX_TREE_MAP_SHIFT;
+		height--;
+	} while (height > 0);
+
+	return ret;
+}
+
+static struct radix_tree_node *
+__radix_optimistic_lock(struct radix_tree_context *context, unsigned long index,
+	       	int tag, radix_valid_fn valid)
+{
+	struct radix_tree_node *node;
+	spinlock_t *locked;
+	unsigned int shift, offset;
+
+	node = radix_optimistic_lookup(context, index, tag, valid);
+	if (!node)
+		goto out;
+
+	locked = radix_node_lock(context->root, node);
+	if (!locked)
+		goto out;
+
+#if 0
+	if (node != radix_optimistic_lookup(context, index, tag, valid))
+		goto out_unlock;
+#else
+	/* check if the node got freed */
+	if (!node->count)
+		goto out_unlock;
+
+	/* check if the node is still a valid termination point */
+	shift = (node->height - 1) * RADIX_TREE_MAP_SHIFT;
+	offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+	if (!(*valid)(node, offset, tag))
+		goto out_unlock;
+#endif
+
+	context->locked = locked;
+	return node;
+
+out_unlock:
+	spin_unlock(locked);
+out:
+	return NULL;
+}
+
+static struct radix_tree_node *
+radix_optimistic_lock(struct radix_tree_context *context, unsigned long index,
+		int tag, radix_valid_fn valid)
+{
+	struct radix_tree_node *node = NULL;
+
+	if (context) {
+		node = __radix_optimistic_lock(context, index, tag, valid);
+		if (!node) {
+			BUG_ON(context->locked);
+			spin_lock(&context->root->lock);
+			context->locked = &context->root->lock;
+		}
+	}
+	return node;
+}
+
+static int radix_valid_always(struct radix_tree_node *node, int offset, int tag)
+{
+	return 1;
+}
+
+static int radix_valid_tag(struct radix_tree_node *node, int offset, int tag)
+{
+	return tag_get(node, tag, offset);
+}
+#else
+#define radix_optimistic_lock(context, index, tag, valid) NULL
+#endif
+
 /**
  *	radix_tree_insert    -    insert into a radix tree
  *	@root:		radix tree root
@@ -382,6 +493,13 @@ int radix_tree_insert(struct radix_tree_
 
 	BUG_ON(radix_tree_is_indirect_ptr(item));
 
+	node = radix_optimistic_lock(context, index, 0, radix_valid_always);
+	if (node) {
+		height = node->height;
+		shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+		goto optimistic;
+	}
+
 	/* Make sure the tree is high enough.  */
 	if (index > radix_tree_maxindex(root->height)) {
 		error = radix_tree_extend(root, index);
@@ -390,7 +508,6 @@ int radix_tree_insert(struct radix_tree_
 	}
 
 	slot = radix_tree_indirect_to_ptr(root->rnode);
-
 	height = root->height;
 	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
 
@@ -409,11 +526,11 @@ int radix_tree_insert(struct radix_tree_
 		}
 
 		/* Go a level down */
-		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 		node = slot;
-
 		radix_ladder_lock(context, node);
 
+optimistic:
+		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 		slot = node->slots[offset];
 		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
@@ -456,6 +573,10 @@ void **radix_tree_lookup_slot(struct rad
 	struct radix_tree_node *node, **slot;
 	RADIX_TREE_CONTEXT(context, root);
 
+	node = radix_optimistic_lock(context, index, 0, radix_valid_always);
+	if (node)
+		goto optimistic;
+
 	node = rcu_dereference(root->rnode);
 	if (node == NULL)
 		return NULL;
@@ -467,6 +588,7 @@ void **radix_tree_lookup_slot(struct rad
 	}
 	node = radix_tree_indirect_to_ptr(node);
 
+optimistic:
 	height = node->height;
 	if (index > radix_tree_maxindex(height))
 		return NULL;
@@ -559,6 +681,13 @@ void *radix_tree_tag_set(struct radix_tr
 	struct radix_tree_node *slot;
 	RADIX_TREE_CONTEXT(context, root);
 
+	slot = radix_optimistic_lock(context, index, tag, radix_valid_tag);
+	if (slot) {
+		height = slot->height;
+		shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+		goto optimistic;
+	}
+
 	height = root->height;
 	BUG_ON(index > radix_tree_maxindex(height));
 
@@ -574,6 +703,7 @@ void *radix_tree_tag_set(struct radix_tr
 
 		radix_ladder_lock(context, slot);
 
+optimistic:
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 		if (!tag_get(slot, tag, offset))
 			tag_set(slot, tag, offset);
@@ -590,13 +720,13 @@ EXPORT_SYMBOL(radix_tree_tag_set);
 /*
  * the change can never propagate upwards from here.
  */
-static inline int radix_tree_unlock_tag(struct radix_tree_root *root,
-		struct radix_tree_path *pathp, int tag)
+static
+int radix_valid_tag_clear(struct radix_tree_node *node, int offset, int tag)
 {
 	int this, other;
 
-	this = tag_get(pathp->node, tag, pathp->offset);
-	other = any_tag_set_but(pathp->node, tag, pathp->offset);
+	this = tag_get(node, tag, offset);
+	other = any_tag_set_but(node, tag, offset);
 
 	return !this || other;
 }
@@ -621,9 +751,22 @@ void *radix_tree_tag_clear(struct radix_
 	struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
 	struct radix_tree_path *punlock = path, *piter;
 	struct radix_tree_node *slot = NULL;
-	unsigned int height, shift;
+	unsigned int height, shift, offset;
+
 	RADIX_TREE_CONTEXT(context, root);
 
+	slot = radix_optimistic_lock(context, index, tag,
+			radix_valid_tag_clear);
+	if (slot) {
+		height = slot->height;
+		shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+		pathp->offset = offset;
+		pathp->node = slot;
+		radix_path_init(context, pathp);
+		goto optimistic;
+	}
+
 	pathp->node = NULL;
 	radix_path_init(context, pathp);
 
@@ -635,8 +778,6 @@ void *radix_tree_tag_clear(struct radix_
 	slot = radix_tree_indirect_to_ptr(root->rnode);
 
 	while (height > 0) {
-		int offset;
-
 		if (slot == NULL)
 			goto out;
 
@@ -646,11 +787,12 @@ void *radix_tree_tag_clear(struct radix_
 		pathp->node = slot;
 		radix_path_lock(context, pathp, slot);
 
-		if (radix_tree_unlock_tag(root, pathp, tag)) {
+		if (radix_valid_tag_clear(slot, offset, tag)) {
 			for (; punlock < pathp; punlock++)
 				radix_path_unlock(context, punlock);
 		}
 
+optimistic:
 		slot = slot->slots[offset];
 		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
@@ -1160,14 +1302,20 @@ static inline void radix_tree_shrink(str
 	}
 }
 
-static inline int radix_tree_unlock_all(struct radix_tree_root *root,
-		struct radix_tree_path *pathp)
+static
+int radix_valid_delete(struct radix_tree_node *node, int offset, int tag)
 {
-	int tag;
-	int unlock = 1;
+	/*
+	 * we need to check for > 2, because nodes with a single child
+	 * can still be deleted, see radix_tree_shrink().
+	 */
+	int unlock = (node->count > 2);
+
+	if (!unlock)
+		return unlock;
 
 	for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
-		if (!radix_tree_unlock_tag(root, pathp, tag)) {
+		if (!radix_valid_tag_clear(node, offset, tag)) {
 			unlock = 0;
 			break;
 		}
@@ -1195,6 +1343,17 @@ void *radix_tree_delete(struct radix_tre
 	int offset;
 	RADIX_TREE_CONTEXT(context, root);
 
+	slot = radix_optimistic_lock(context, index, 0, radix_valid_delete);
+	if (slot) {
+		height = slot->height;
+		shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+		pathp->offset = offset;
+		pathp->node = slot;
+		radix_path_init(context, pathp);
+		goto optimistic;
+	}
+
 	pathp->node = NULL;
 	radix_path_init(context, pathp);
 
@@ -1222,11 +1381,12 @@ void *radix_tree_delete(struct radix_tre
 		pathp->node = slot;
 		radix_path_lock(context, pathp, slot);
 
-		if (slot->count > 2 && radix_tree_unlock_all(root, pathp)) {
+		if (radix_valid_delete(slot, offset, 0)) {
 			for (; punlock < pathp; punlock++)
 				radix_path_unlock(context, punlock);
 		}
 
+optimistic:
 		slot = slot->slots[offset];
 		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig	2007-04-18 09:56:20.000000000 +0200
+++ linux-2.6/init/Kconfig	2007-04-18 09:56:27.000000000 +0200
@@ -344,8 +344,14 @@ config SYSCTL
 
 config RADIX_TREE_CONCURRENT
 	bool "Enable concurrent radix tree operations (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
 	default y if SMP
 
+config RADIX_TREE_OPTIMISTIC
+	bool "Enabled optimistic locking (EXPERIMENTAL)"
+	depends on RADIX_TREE_CONCURRENT
+	default y
+
 menuconfig EMBEDDED
 	bool "Configure standard kernel features (for small systems)"
 	help
Index: linux-2.6/include/linux/radix-tree.h
===================================================================
--- linux-2.6.orig/include/linux/radix-tree.h	2007-04-18 09:56:20.000000000 +0200
+++ linux-2.6/include/linux/radix-tree.h	2007-04-18 09:56:27.000000000 +0200
@@ -197,28 +197,47 @@ static inline void radix_tree_replace_sl
 	rcu_assign_pointer(*pslot, item);
 }
 
+#if defined(CONFIG_RADIX_TREE_OPTIMISTIC)
+static inline void radix_tree_lock(struct radix_tree_context *context)
+{
+	rcu_read_lock();
+	BUG_ON(context->locked);
+}
+#elif defined(CONFIG_RADIX_TREE_CONCURRENT)
 static inline void radix_tree_lock(struct radix_tree_context *context)
 {
 	struct radix_tree_root *root = context->root;
+
 	rcu_read_lock();
 	spin_lock(&root->lock);
-#ifdef CONFIG_RADIX_TREE_CONCURRENT
 	BUG_ON(context->locked);
 	context->locked = &root->lock;
-#endif
 }
+#else
+static inline void radix_tree_lock(struct radix_tree_context *context)
+{
+	struct radix_tree_root *root = context->root;
+
+	rcu_read_lock();
+	spin_lock(&root->lock);
+}
+#endif
 
+#if defined(CONFIG_RADIX_TREE_CONCURRENT)
 static inline void radix_tree_unlock(struct radix_tree_context *context)
 {
-#ifdef CONFIG_RADIX_TREE_CONCURRENT
 	BUG_ON(!context->locked);
 	spin_unlock(context->locked);
 	context->locked = NULL;
+	rcu_read_unlock();
+}
 #else
+static inline void radix_tree_unlock(struct radix_tree_context *context)
+{
 	spin_unlock(&context->root->lock);
-#endif
 	rcu_read_unlock();
 }
+#endif
 
 int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
 void *radix_tree_lookup(struct radix_tree_root *, unsigned long);

-- 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2007-04-18 20:12 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-04-18 20:12 [PATCH 0/6] concurrent pagecache Peter Zijlstra
2007-04-18 20:12 ` [PATCH 1/6] radix-tree: concurrent write side support Peter Zijlstra
2007-04-18 20:12 ` [PATCH 2/6] mm/fs: abstract address_space::nrpages Peter Zijlstra
2007-04-18 20:12 ` [PATCH 3/6] mm: lock_page_ref Peter Zijlstra
2007-04-18 20:12 ` [PATCH 4/6] mm: concurrent pagecache write side Peter Zijlstra
2007-04-18 20:12 ` Peter Zijlstra [this message]
2007-04-18 20:12 ` [PATCH 6/6] debug: optimistic lock histogram Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070418201606.090997479@chello.nl \
    --to=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=clameter@sgi.com \
    --cc=linux-mm@kvack.org \
    --cc=npiggin@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.