linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Matthew Wilcox <willy@infradead.org>
To: linux-fsdevel@vger.kernel.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Subject: [PATCH 17/62] xarray: Change definition of sibling entries
Date: Wed, 22 Nov 2017 13:06:54 -0800	[thread overview]
Message-ID: <20171122210739.29916-18-willy@infradead.org> (raw)
In-Reply-To: <20171122210739.29916-1-willy@infradead.org>

From: Matthew Wilcox <mawilcox@microsoft.com>

Instead of storing a pointer to the slot containing the canonical entry,
store the offset of the slot.  Produces slightly more efficient code
(~300 bytes) and simplifies the implementation.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/xarray.h | 64 +++++++++++++++++++++++++++++++++++++++++++++++++
 lib/radix-tree.c       | 65 ++++++++++++++------------------------------------
 2 files changed, 82 insertions(+), 47 deletions(-)

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index b1da8021b5fa..b9e0350b9e90 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -84,6 +84,8 @@ static inline bool xa_is_value(void *entry)
 	return (unsigned long)entry & 1;
 }
 
+/* Everything below here is the Advanced API.  Proceed with caution. */
+
 #define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
 #define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
 #define xa_unlock(xa)		spin_unlock(&(xa)->xa_lock)
@@ -97,4 +99,66 @@ static inline bool xa_is_value(void *entry)
 				spin_unlock_irqrestore(&(xa)->xa_lock, flags)
 #define xa_lock_held(xa)	lockdep_is_held(&(xa)->xa_lock)
 
+/*
+ * The xarray is constructed out of a set of 'chunks' of pointers.  Choosing
+ * the best chunk size requires some tradeoffs.  A power of two recommends
+ * itself so that we can walk the tree based purely on shifts and masks.
+ * Generally, the larger the better; as the number of slots per level of the
+ * tree increases, the less tall the tree needs to be.  But that needs to be
+ * balanced against the memory consumption of each node.  On a 64-bit system,
+ * xa_node is currently 576 bytes, and we get 7 of them per 4kB page.  If we
+ * doubled the number of slots per node, we'd get only 3 nodes per 4kB page.
+ */
+#ifndef XA_CHUNK_SHIFT
+#define XA_CHUNK_SHIFT		(CONFIG_BASE_SMALL ? 4 : 6)
+#endif
+#define XA_CHUNK_SIZE		(1UL << XA_CHUNK_SHIFT)
+#define XA_CHUNK_MASK		(XA_CHUNK_SIZE - 1)
+
+/*
+ * Internal entries have the bottom two bits set to the value 10b.  Most
+ * internal entries are pointers to the next node in the tree.  Since the
+ * kernel unmaps page 0 to trap NULL pointer dereferences, we can store up
+ * to 1024 distinct values in the tree.  Values 0-62 are used for sibling
+ * entries.  The retry entry is value 256.
+ */
+static inline void *xa_mk_internal(unsigned long v)
+{
+	return (void *)((v << 2) | 2);
+}
+
+static inline unsigned long xa_to_internal(void *entry)
+{
+	return (unsigned long)entry >> 2;
+}
+
+static inline bool xa_is_internal(void *entry)
+{
+	return ((unsigned long)entry & 3) == 2;
+}
+
+static inline bool xa_is_node(void *entry)
+{
+	return xa_is_internal(entry) && (unsigned long)entry > 4096;
+}
+
+static inline void *xa_mk_sibling(unsigned int offset)
+{
+	return xa_mk_internal(offset);
+}
+
+static inline unsigned long xa_to_sibling(void *entry)
+{
+	return xa_to_internal(entry);
+}
+
+static inline bool xa_is_sibling(void *entry)
+{
+	return IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) &&
+		xa_is_internal(entry) &&
+		(entry < xa_mk_sibling(XA_CHUNK_SIZE - 1));
+}
+
+#define XA_RETRY_ENTRY		xa_mk_internal(256)
+
 #endif /* _LINUX_XARRAY_H */
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 30e49b89aa3b..4a1091e31932 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -37,6 +37,7 @@
 #include <linux/rcupdate.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/xarray.h>
 
 
 /* Number of nodes in fully populated tree of given height */
@@ -97,24 +98,7 @@ static inline void *node_to_entry(void *ptr)
 	return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
 }
 
-#define RADIX_TREE_RETRY	node_to_entry(NULL)
-
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-/* Sibling slots point directly to another slot in the same node */
-static inline
-bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
-{
-	void __rcu **ptr = node;
-	return (parent->slots <= ptr) &&
-			(ptr < parent->slots + RADIX_TREE_MAP_SIZE);
-}
-#else
-static inline
-bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
-{
-	return false;
-}
-#endif
+#define RADIX_TREE_RETRY	XA_RETRY_ENTRY
 
 static inline unsigned long
 get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot)
@@ -128,16 +112,10 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
 	unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
 	void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
 
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-	if (radix_tree_is_internal_node(entry)) {
-		if (is_sibling_entry(parent, entry)) {
-			void __rcu **sibentry;
-			sibentry = (void __rcu **) entry_to_node(entry);
-			offset = get_slot_offset(parent, sibentry);
-			entry = rcu_dereference_raw(*sibentry);
-		}
+	if (xa_is_sibling(entry)) {
+		offset = xa_to_sibling(entry);
+		entry = rcu_dereference_raw(parent->slots[offset]);
 	}
-#endif
 
 	*nodep = (void *)entry;
 	return offset;
@@ -299,10 +277,10 @@ static void dump_node(struct radix_tree_node *node, unsigned long index)
 		} else if (!radix_tree_is_internal_node(entry)) {
 			pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n",
 					entry, i, first, last, node);
-		} else if (is_sibling_entry(node, entry)) {
+		} else if (xa_is_sibling(entry)) {
 			pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n",
 					entry, i, first, last, node,
-					*(void **)entry_to_node(entry));
+					node->slots[xa_to_sibling(entry)]);
 		} else {
 			dump_node(entry_to_node(entry), first);
 		}
@@ -872,8 +850,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
 
 	for (;;) {
 		void *entry = rcu_dereference_raw(child->slots[offset]);
-		if (radix_tree_is_internal_node(entry) &&
-					!is_sibling_entry(child, entry)) {
+		if (xa_is_node(entry)) {
 			child = entry_to_node(entry);
 			offset = 0;
 			continue;
@@ -895,7 +872,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
 static inline int insert_entries(struct radix_tree_node *node,
 		void __rcu **slot, void *item, unsigned order, bool replace)
 {
-	struct radix_tree_node *child;
+	void *sibling;
 	unsigned i, n, tag, offset, tags = 0;
 
 	if (node) {
@@ -913,7 +890,7 @@ static inline int insert_entries(struct radix_tree_node *node,
 		offset = offset & ~(n - 1);
 		slot = &node->slots[offset];
 	}
-	child = node_to_entry(slot);
+	sibling = xa_mk_sibling(offset);
 
 	for (i = 0; i < n; i++) {
 		if (slot[i]) {
@@ -930,7 +907,7 @@ static inline int insert_entries(struct radix_tree_node *node,
 	for (i = 0; i < n; i++) {
 		struct radix_tree_node *old = rcu_dereference_raw(slot[i]);
 		if (i) {
-			rcu_assign_pointer(slot[i], child);
+			rcu_assign_pointer(slot[i], sibling);
 			for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
 				if (tags & (1 << tag))
 					tag_clear(node, tag, offset + i);
@@ -940,9 +917,7 @@ static inline int insert_entries(struct radix_tree_node *node,
 				if (tags & (1 << tag))
 					tag_set(node, tag, offset);
 		}
-		if (radix_tree_is_internal_node(old) &&
-					!is_sibling_entry(node, old) &&
-					(old != RADIX_TREE_RETRY))
+		if (xa_is_node(old))
 			radix_tree_free_nodes(old);
 		if (xa_is_value(old))
 			node->exceptional--;
@@ -1101,10 +1076,10 @@ static inline void replace_sibling_entries(struct radix_tree_node *node,
 				void __rcu **slot, int count, int exceptional)
 {
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
-	void *ptr = node_to_entry(slot);
-	unsigned offset = get_slot_offset(node, slot) + 1;
+	unsigned offset = get_slot_offset(node, slot);
+	void *ptr = xa_mk_sibling(offset);
 
-	while (offset < RADIX_TREE_MAP_SIZE) {
+	while (++offset < RADIX_TREE_MAP_SIZE) {
 		if (rcu_dereference_raw(node->slots[offset]) != ptr)
 			break;
 		if (count < 0) {
@@ -1112,7 +1087,6 @@ static inline void replace_sibling_entries(struct radix_tree_node *node,
 			node->count--;
 		}
 		node->exceptional += exceptional;
-		offset++;
 	}
 #endif
 }
@@ -1311,8 +1285,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
 			tags |= 1 << tag;
 
 	for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
-		if (!is_sibling_entry(parent,
-				rcu_dereference_raw(parent->slots[end])))
+		if (!xa_is_sibling(rcu_dereference_raw(parent->slots[end])))
 			break;
 		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
 			if (tags & (1 << tag))
@@ -1608,11 +1581,9 @@ static void set_iter_tags(struct radix_tree_iter *iter,
 static void __rcu **skip_siblings(struct radix_tree_node **nodep,
 			void __rcu **slot, struct radix_tree_iter *iter)
 {
-	void *sib = node_to_entry(slot - 1);
-
 	while (iter->index < iter->next_index) {
 		*nodep = rcu_dereference_raw(*slot);
-		if (*nodep && *nodep != sib)
+		if (*nodep && !xa_is_sibling(*nodep))
 			return slot;
 		slot++;
 		iter->index = __radix_tree_iter_add(iter, 1);
@@ -1763,7 +1734,7 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
 				while (++offset	< RADIX_TREE_MAP_SIZE) {
 					void *slot = rcu_dereference_raw(
 							node->slots[offset]);
-					if (is_sibling_entry(node, slot))
+					if (xa_is_sibling(slot))
 						continue;
 					if (slot)
 						break;
-- 
2.15.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2017-11-22 21:06 UTC|newest]

Thread overview: 79+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-11-22 21:06 [PATCH 00/62] XArray November 2017 Edition Matthew Wilcox
2017-11-22 21:06 ` [PATCH 01/62] tools: Make __test_and_clear_bit available Matthew Wilcox
2017-11-22 21:06 ` [PATCH 02/62] radix tree test suite: Remove ARRAY_SIZE Matthew Wilcox
2017-11-22 21:06 ` [PATCH 03/62] radix tree test suite: Check reclaim bit Matthew Wilcox
2017-11-22 21:06 ` [PATCH 04/62] idr test suite: Fix ida_test_random() Matthew Wilcox
2017-11-22 21:06 ` [PATCH 05/62] radix tree: Add a missing cast to gfp_t Matthew Wilcox
2017-11-22 21:28   ` Luc Van Oostenryck
2017-11-22 22:24     ` Matthew Wilcox
2017-11-22 22:35       ` Luc Van Oostenryck
2017-11-22 21:06 ` [PATCH 06/62] idr: Make cursor explicit for cyclic allocation Matthew Wilcox
2017-11-22 21:06 ` [PATCH 07/62] idr: Rewrite extended IDR API Matthew Wilcox
2017-11-22 21:06 ` [PATCH 08/62] Explicitly include radix-tree.h Matthew Wilcox
2017-11-22 21:06 ` [PATCH 09/62] arm64: Turn flush_dcache_mmap_lock into a no-op Matthew Wilcox
2017-11-22 21:06 ` [PATCH 10/62] unicore32: " Matthew Wilcox
2017-11-22 21:06 ` [PATCH 11/62] Export __set_page_dirty Matthew Wilcox
2017-11-22 21:06 ` [PATCH 12/62] xfs: Rename xa_ elements to ail_ Matthew Wilcox
2017-11-22 21:06 ` [PATCH 13/62] fscache: Use appropriate radix tree accessors Matthew Wilcox
2017-11-22 21:06 ` [PATCH 14/62] xarray: Add the xa_lock to the radix_tree_root Matthew Wilcox
2017-11-22 21:06 ` [PATCH 15/62] page cache: Use xa_lock Matthew Wilcox
2017-11-22 21:06 ` [PATCH 16/62] xarray: Replace exceptional entries Matthew Wilcox
2017-11-22 21:06 ` Matthew Wilcox [this message]
2017-11-22 21:06 ` [PATCH 18/62] xarray: Add definition of struct xarray Matthew Wilcox
2017-11-22 21:06 ` [PATCH 19/62] xarray: Define struct xa_node Matthew Wilcox
2017-11-22 21:06 ` [PATCH 20/62] xarray: Add xa_load Matthew Wilcox
2017-11-22 21:06 ` [PATCH 21/62] xarray: Add xa_get_tag, xa_set_tag and xa_clear_tag Matthew Wilcox
2017-11-22 21:06 ` [PATCH 22/62] xarray: Add xa_store Matthew Wilcox
2017-11-22 21:07 ` [PATCH 23/62] xarray: Add xa_cmpxchg Matthew Wilcox
2017-11-22 21:07 ` [PATCH 24/62] xarray: Add xa_for_each Matthew Wilcox
2017-11-22 21:07 ` [PATCH 25/62] xarray: Add xa_init Matthew Wilcox
2017-11-22 21:07 ` [PATCH 26/62] xarray: Add xas_for_each_tag Matthew Wilcox
2017-11-22 21:07 ` [PATCH 27/62] xarray: Add xa_get_entries and xa_get_tagged Matthew Wilcox
2017-11-22 21:07 ` [PATCH 28/62] xarray: Add xa_destroy Matthew Wilcox
2017-11-22 21:07 ` [PATCH 29/62] xarray: Add xas_prev_any Matthew Wilcox
2017-11-22 21:07 ` [PATCH 30/62] xarray: Add xas_find_any / xas_next_any Matthew Wilcox
2017-11-22 21:07 ` [PATCH 31/62] Convert IDR to use xarray Matthew Wilcox
2017-11-22 21:07 ` [PATCH 32/62] ida: Convert to using xarray Matthew Wilcox
2017-11-22 21:07 ` [PATCH 33/62] page cache: Convert page_cache_next_hole to XArray Matthew Wilcox
2017-11-22 21:07 ` [PATCH 34/62] page cache: Use xarray for adding pages Matthew Wilcox
2017-11-22 21:07 ` [PATCH 35/62] page cache: Convert page_cache_tree_delete to xarray Matthew Wilcox
2017-11-22 21:07 ` [PATCH 36/62] page cache: Convert find_get_entry " Matthew Wilcox
2017-11-22 21:07 ` [PATCH 37/62] shmem: Convert replace " Matthew Wilcox
2017-11-22 21:07 ` [PATCH 38/62] shmem: Convert shmem_confirm_swap to XArray Matthew Wilcox
2017-11-22 21:07 ` [PATCH 39/62] shmem: Convert find_swap_entry " Matthew Wilcox
2017-11-22 21:07 ` [PATCH 40/62] shmem: Convert shmem_tag_pins " Matthew Wilcox
2017-11-22 21:07 ` [PATCH 41/62] shmem: Convert shmem_wait_for_pins " Matthew Wilcox
2017-11-22 21:07 ` [PATCH 42/62] vmalloc: Convert to xarray Matthew Wilcox
2017-11-22 21:07 ` [PATCH 43/62] brd: Convert to XArray Matthew Wilcox
2017-11-22 21:07 ` [PATCH 44/62] xfs: Convert m_perag_tree " Matthew Wilcox
2017-11-22 21:07 ` [PATCH 45/62] xfs: Convert pag_ici_root " Matthew Wilcox
2017-11-22 21:07 ` [PATCH 46/62] xfs: Convert xfs dquot " Matthew Wilcox
2017-11-22 21:07 ` [PATCH 47/62] xfs: Convert mru cache " Matthew Wilcox
2017-11-22 21:07 ` [PATCH 48/62] block: Remove IDR preloading Matthew Wilcox
2017-11-22 21:07 ` [PATCH 49/62] rxrpc: " Matthew Wilcox
2017-11-22 21:07 ` [PATCH 50/62] cgroup: Remove IDR wrappers Matthew Wilcox
2017-11-22 21:07 ` [PATCH 51/62] dca: Remove idr_preload calls Matthew Wilcox
2017-11-22 21:07 ` [PATCH 52/62] ipc: Remove call to idr_preload Matthew Wilcox
2017-11-22 21:07 ` [PATCH 53/62] irq: " Matthew Wilcox
2017-11-22 21:07 ` [PATCH 54/62] scsi: Remove idr_preload in st driver Matthew Wilcox
2017-11-22 21:07 ` [PATCH 55/62] firewire: Remove call to idr_preload Matthew Wilcox
2017-11-22 21:07 ` [PATCH 56/62] drm: Remove drm_minor_lock and idr_preload Matthew Wilcox
2017-11-22 21:07 ` [PATCH 57/62] drm: Remove drm_syncobj_fd_to_handle Matthew Wilcox
2017-11-22 21:07 ` [PATCH 58/62] drm: Remove qxl driver IDR locks Matthew Wilcox
2017-11-22 21:07 ` [PATCH 59/62] drm: Replace virtio IDRs with IDAs Matthew Wilcox
2017-11-22 21:07 ` [PATCH 60/62] drm: Replace vmwgfx " Matthew Wilcox
2017-11-22 21:07 ` [PATCH 61/62] net: Redesign act_api use of IDR Matthew Wilcox
2017-11-22 21:07 ` [PATCH 62/62] mm: Convert page-writeback to XArray Matthew Wilcox
2017-11-23  1:25 ` [PATCH 00/62] XArray November 2017 Edition Dave Chinner
2017-11-23  2:46   ` Matthew Wilcox
2017-11-24  1:16 ` XArray documentation Matthew Wilcox
2017-11-24  4:30   ` Andreas Dilger
2017-11-24 17:17     ` Matthew Wilcox
2017-11-24 16:50   ` Martin Steigerwald
2017-11-24 17:03     ` Matthew Wilcox
2017-11-24 18:01       ` Martin Steigerwald
2017-11-24 19:48         ` Shakeel Butt
2017-11-24 19:56           ` Matthew Wilcox
2017-11-24 21:18         ` Matthew Wilcox
2017-11-24 22:02           ` Martin Steigerwald
2017-11-24 22:08             ` Matthew Wilcox

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171122210739.29916-18-willy@infradead.org \
    --to=willy@infradead.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mawilcox@microsoft.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).