From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
To: "Theodore Ts'o" <tytso@mit.edu>,
Andreas Dilger <adilger.kernel@dilger.ca>,
Jan Kara <jack@suse.com>,
Andrew Morton <akpm@linux-foundation.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>,
Hugh Dickins <hughd@google.com>,
Andrea Arcangeli <aarcange@redhat.com>,
Dave Hansen <dave.hansen@intel.com>,
Vlastimil Babka <vbabka@suse.cz>,
Matthew Wilcox <willy@infradead.org>,
Ross Zwisler <ross.zwisler@linux.intel.com>,
linux-ext4@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
linux-block@vger.kernel.org,
Matthew Wilcox <willy@linux.intel.com>,
"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Subject: [PATCHv3 04/41] radix-tree: Add radix_tree_split
Date: Thu, 15 Sep 2016 14:54:46 +0300 [thread overview]
Message-ID: <20160915115523.29737-5-kirill.shutemov@linux.intel.com> (raw)
In-Reply-To: <20160915115523.29737-1-kirill.shutemov@linux.intel.com>
From: Matthew Wilcox <willy@linux.intel.com>
This new function splits a larger multiorder entry into smaller entries
(potentially multi-order entries). These entries are initialised to
RADIX_TREE_RETRY to ensure that RCU walkers who see this state aren't
confused. The caller should then call radix_tree_for_each_slot() and
radix_tree_replace_slot() in order to turn these retry entries into the
intended new entries. Tags are replicated from the original multiorder
entry into each new entry.
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
include/linux/radix-tree.h | 6 +-
lib/radix-tree.c | 109 ++++++++++++++++++++++++++++++++--
tools/testing/radix-tree/multiorder.c | 26 ++++++++
3 files changed, 135 insertions(+), 6 deletions(-)
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 75ae4648d13d..459e8a152c8a 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -280,8 +280,7 @@ bool __radix_tree_delete_node(struct radix_tree_root *root,
struct radix_tree_node *node);
void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
void *radix_tree_delete(struct radix_tree_root *, unsigned long);
-struct radix_tree_node *radix_tree_replace_clear_tags(
- struct radix_tree_root *root,
+struct radix_tree_node *radix_tree_replace_clear_tags(struct radix_tree_root *,
unsigned long index, void *entry);
unsigned int radix_tree_gang_lookup(struct radix_tree_root *root,
void **results, unsigned long first_index,
@@ -319,8 +318,11 @@ static inline void radix_tree_preload_end(void)
preempt_enable();
}
+int radix_tree_split(struct radix_tree_root *, unsigned long index,
+ unsigned new_order);
int radix_tree_join(struct radix_tree_root *, unsigned long index,
unsigned new_order, void *);
+
/**
* struct radix_tree_iter - radix tree iterator state
*
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 3157f223c268..ad3116cbe61b 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -231,7 +231,10 @@ static void dump_node(struct radix_tree_node *node, unsigned long index)
void *entry = node->slots[i];
if (!entry)
continue;
- if (is_sibling_entry(node, entry)) {
+ if (entry == RADIX_TREE_RETRY) {
+ pr_debug("radix retry offset %ld indices %ld-%ld\n",
+ i, first, last);
+ } else if (is_sibling_entry(node, entry)) {
pr_debug("radix sblng %p offset %ld val %p indices %ld-%ld\n",
entry, i,
*(void **)entry_to_node(entry),
@@ -641,7 +644,10 @@ static inline int insert_entries(struct radix_tree_node *node, void **slot,
unsigned i, n, tag, offset, tags = 0;
if (node) {
- n = 1 << (order - node->shift);
+ if (order > node->shift)
+ n = 1 << (order - node->shift);
+ else
+ n = 1;
offset = get_slot_offset(node, slot);
} else {
n = 1;
@@ -680,7 +686,8 @@ static inline int insert_entries(struct radix_tree_node *node, void **slot,
tag_set(node, tag, offset);
}
if (radix_tree_is_internal_node(old) &&
- !is_sibling_entry(node, old))
+ !is_sibling_entry(node, old) &&
+ (old != RADIX_TREE_RETRY))
radix_tree_free_nodes(old);
}
if (node)
@@ -843,6 +850,98 @@ int radix_tree_join(struct radix_tree_root *root, unsigned long index,
return error;
}
+
+int radix_tree_split(struct radix_tree_root *root, unsigned long index,
+ unsigned order)
+{
+ struct radix_tree_node *parent, *node, *child;
+ void **slot;
+ unsigned int offset, end;
+ unsigned n, tag, tags = 0;
+
+ if (!__radix_tree_lookup(root, index, &parent, &slot))
+ return -ENOENT;
+ if (!parent)
+ return -ENOENT;
+
+ offset = get_slot_offset(parent, slot);
+
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+ if (tag_get(parent, tag, offset))
+ tags |= 1 << tag;
+
+ for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
+ if (!is_sibling_entry(parent, parent->slots[end]))
+ break;
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+ if (tags & (1 << tag))
+ tag_set(parent, tag, end);
+ /* tags must be set before RETRY is set */
+ rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY);
+ }
+
+ if (order == parent->shift)
+ return 0;
+ if (order > parent->shift) {
+ while (offset < end)
+ offset += insert_entries(parent, &parent->slots[offset],
+ RADIX_TREE_RETRY, order, true);
+ return 0;
+ }
+
+ node = parent;
+
+ for (;;) {
+ if (node->shift > order) {
+ child = radix_tree_node_alloc(root);
+ if (!child)
+ goto nomem;
+ child->shift = node->shift - RADIX_TREE_MAP_SHIFT;
+ child->offset = offset;
+ child->count = 0;
+ child->parent = node;
+ if (node != parent) {
+ node->count++;
+ node->slots[offset] = node_to_entry(child);
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+ if (tags & (1 << tag))
+ tag_set(node, tag, offset);
+ }
+
+ node = child;
+ offset = 0;
+ continue;
+ }
+
+ n = insert_entries(node, &node->slots[offset],
+ RADIX_TREE_RETRY, order, false);
+ BUG_ON(n > RADIX_TREE_MAP_SIZE);
+
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+ if (tags & (1 << tag))
+ tag_set(node, tag, offset);
+ offset += n;
+
+ while (offset == RADIX_TREE_MAP_SIZE) {
+ if (node == parent)
+ break;
+ offset = node->offset;
+ child = node;
+ node = node->parent;
+ rcu_assign_pointer(node->slots[offset],
+ node_to_entry(child));
+ offset++;
+ }
+ if ((node == parent) && (offset == end))
+ return 0;
+ }
+
+ nomem:
+ /* Shouldn't happen; did user forget to preload? */
+ /* TODO: free all the allocated nodes */
+ WARN_ON(1);
+ return -ENOMEM;
+}
#endif
/**
@@ -1081,8 +1180,10 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
child = rcu_dereference_raw(node->slots[offset]);
}
- if ((child == NULL) || (child == RADIX_TREE_RETRY))
+ if (!child)
goto restart;
+ if (child == RADIX_TREE_RETRY)
+ break;
} while (radix_tree_is_internal_node(child));
/* Update the iterator state */
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index f917da164b00..9d27a4dd7b2a 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -348,6 +348,31 @@ static void multiorder_join(void)
}
}
+static void __multiorder_split(int old_order, int new_order)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ void **slot;
+ struct radix_tree_iter iter;
+
+ item_insert_order(&tree, 0, old_order);
+ radix_tree_tag_set(&tree, 0, 2);
+ radix_tree_split(&tree, 0, new_order);
+ radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+ radix_tree_replace_slot(slot, item_create(iter.index));
+ }
+
+ item_kill_tree(&tree);
+}
+
+static void multiorder_split(void)
+{
+ int i, j;
+
+ for (i = 9; i < 19; i++)
+ for (j = 0; j < i; j++)
+ __multiorder_split(i, j);
+}
+
void multiorder_checks(void)
{
int i;
@@ -366,4 +391,5 @@ void multiorder_checks(void)
multiorder_iteration();
multiorder_tagged_iteration();
multiorder_join();
+ multiorder_split();
}
--
2.9.3
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2016-09-15 11:54 UTC|newest]
Thread overview: 75+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-09-15 11:54 [PATCHv3 00/41] ext4: support of huge pages Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 01/41] tools: Add WARN_ON_ONCE Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 02/41] radix tree test suite: Allow GFP_ATOMIC allocations to fail Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 03/41] radix-tree: Add radix_tree_join Kirill A. Shutemov
2016-09-15 11:54 ` Kirill A. Shutemov [this message]
2016-09-15 11:54 ` [PATCHv3 05/41] radix-tree: Add radix_tree_split_preload() Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 06/41] radix-tree: Handle multiorder entries being deleted by replace_clear_tags Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 07/41] mm, shmem: swich huge tmpfs to multi-order radix-tree entries Kirill A. Shutemov
2016-09-16 12:07 ` Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 08/41] Revert "radix-tree: implement radix_tree_maybe_preload_order()" Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 09/41] page-flags: relax page flag policy for few flags Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 10/41] mm, rmap: account file thp pages Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 11/41] thp: try to free page's buffers before attempt split Kirill A. Shutemov
2016-10-11 15:40 ` Jan Kara
2016-10-11 21:43 ` Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 12/41] thp: handle write-protection faults for file THP Kirill A. Shutemov
2016-10-11 15:47 ` Jan Kara
2016-10-11 21:47 ` Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 13/41] truncate: make sure invalidate_mapping_pages() can discard huge pages Kirill A. Shutemov
2016-10-11 15:58 ` Jan Kara
2016-10-11 21:53 ` Kirill A. Shutemov
2016-10-12 6:43 ` Jan Kara
2016-10-24 10:41 ` Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 14/41] filemap: allocate huge page in page_cache_read(), if allowed Kirill A. Shutemov
2016-10-11 16:15 ` Jan Kara
2016-10-11 21:57 ` Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 15/41] filemap: handle huge pages in do_generic_file_read() Kirill A. Shutemov
2016-10-13 9:33 ` Jan Kara
2016-10-31 18:10 ` Kirill A. Shutemov
2016-11-01 16:39 ` Jan Kara
2016-11-02 8:32 ` Kirill A. Shutemov
2016-11-02 14:37 ` Christoph Hellwig
2016-11-03 20:40 ` Jan Kara
2016-11-07 11:07 ` Kirill A. Shutemov
2016-11-07 14:59 ` Christoph Hellwig
2016-11-02 14:36 ` Christoph Hellwig
2016-11-03 17:56 ` Jan Kara
2016-11-07 11:13 ` Kirill A. Shutemov
2016-11-07 15:01 ` Christoph Hellwig
2016-11-07 16:03 ` Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 16/41] filemap: allocate huge page in pagecache_get_page(), if allowed Kirill A. Shutemov
2016-09-15 11:54 ` [PATCHv3 17/41] filemap: handle huge pages in filemap_fdatawait_range() Kirill A. Shutemov
2016-10-13 9:44 ` Jan Kara
2016-10-13 12:08 ` Kirill A. Shutemov
2016-10-13 13:18 ` Jan Kara
2016-10-24 11:36 ` Kirill A. Shutemov
2016-10-30 17:31 ` Jan Kara
2016-09-15 11:55 ` [PATCHv3 18/41] HACK: readahead: alloc huge pages, if allowed Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 19/41] block: define BIO_MAX_PAGES to HPAGE_PMD_NR if huge page cache enabled Kirill A. Shutemov
2016-09-16 12:09 ` Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 20/41] mm: make write_cache_pages() work on huge pages Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 21/41] thp: introduce hpage_size() and hpage_mask() Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 22/41] thp: do not threat slab pages as huge in hpage_{nr_pages,size,mask} Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 23/41] fs: make block_read_full_page() be able to read huge page Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 24/41] fs: make block_write_{begin,end}() be able to handle huge pages Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 25/41] fs: make block_page_mkwrite() aware about " Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 26/41] truncate: make truncate_inode_pages_range() " Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 27/41] truncate: make invalidate_inode_pages2_range() " Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 28/41] mm, hugetlb: switch hugetlbfs to multi-order radix-tree entries Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 29/41] ext4: make ext4_mpage_readpages() hugepage-aware Kirill A. Shutemov
2016-09-15 12:27 ` Andreas Dilger
2016-09-16 12:17 ` Kirill A. Shutemov
2016-09-16 12:10 ` Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 30/41] ext4: make ext4_writepage() work on huge pages Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 31/41] ext4: handle huge pages in ext4_page_mkwrite() Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 32/41] ext4: handle huge pages in __ext4_block_zero_page_range() Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 33/41] ext4: make ext4_block_write_begin() aware about huge pages Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 34/41] ext4: handle huge pages in ext4_da_write_end() Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 35/41] ext4: make ext4_da_page_release_reservation() aware about huge pages Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 36/41] ext4: handle writeback with " Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 37/41] ext4: make EXT4_IOC_MOVE_EXT work " Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 38/41] ext4: fix SEEK_DATA/SEEK_HOLE for " Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 39/41] ext4: make fallocate() operations work with " Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 40/41] mm, fs, ext4: expand use of page_mapping() and page_to_pgoff() Kirill A. Shutemov
2016-09-15 11:55 ` [PATCHv3 41/41] ext4, vfs: add huge= mount option Kirill A. Shutemov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160915115523.29737-5-kirill.shutemov@linux.intel.com \
--to=kirill.shutemov@linux.intel.com \
--cc=aarcange@redhat.com \
--cc=adilger.kernel@dilger.ca \
--cc=akpm@linux-foundation.org \
--cc=dave.hansen@intel.com \
--cc=hughd@google.com \
--cc=jack@suse.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=ross.zwisler@linux.intel.com \
--cc=tytso@mit.edu \
--cc=vbabka@suse.cz \
--cc=viro@zeniv.linux.org.uk \
--cc=willy@infradead.org \
--cc=willy@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).