From: Pranjal Arya <pranjal.arya@oss.qualcomm.com>
To: Andrew Morton <akpm@linux-foundation.org>,
Uladzislau Rezki <urezki@gmail.com>,
"Liam R. Howlett" <liam@infradead.org>,
Alice Ryhl <aliceryhl@google.com>,
Andrew Ballance <andrewjballance@gmail.com>
Cc: linux-arm-msm@vger.kernel.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org, maple-tree@lists.infradead.org,
Lorenzo Stoakes <ljs@kernel.org>,
Pranjal Shrivastava <praan@google.com>,
Will Deacon <will@kernel.org>,
Suzuki K Poulose <Suzuki.Poulose@arm.com>,
Neil Armstrong <neil.armstrong@linaro.org>,
Mostafa Saleh <smostafa@google.com>,
Balbir Singh <balbirs@nvidia.com>,
Suren Baghdasaryan <surenb@google.com>,
Marco Elver <elver@google.com>,
Dmitry Vyukov <dvyukov@google.com>,
Alexander Potapenko <glider@google.com>,
Shuah Khan <shuah@kernel.org>, Dev Jain <dev.jain@arm.com>,
Brendan Jackman <jackmanb@google.com>,
Puranjay Mohan <puranjay@kernel.org>,
Santosh Shukla <santosh.shukla@amd.com>,
Wyes Karny <wkarny@gmail.com>,
Pranjal Arya <pranjal.arya@oss.qualcomm.com>,
Sudeep Holla <sudeep.holla@kernel.org>
Subject: [PATCH RFC 01/12] mm/vmalloc: introduce maple_tree-based indexing for vmap_area
Date: Sat, 13 Jun 2026 22:49:43 +0530 [thread overview]
Message-ID: <20260613-vmalloc_maple-v1-1-0aa740bb944b@oss.qualcomm.com> (raw)
In-Reply-To: <20260613-vmalloc_maple-v1-0-0aa740bb944b@oss.qualcomm.com>
Add the maple_tree data structures, helper API, and runtime readiness
plumbing that this series uses to retire the augmented rb_tree
indexing of vmalloc free and busy ranges.
Two new tree handles are added alongside the existing per-node lazy
index:
- free_vmap_area_mt address-keyed gap query for the global
free-area allocator
- vn->busy.mt per-node address-keyed lookup for find/free
Helpers follow a try_init_*_locked / *_store_*_locked / *_erase_*_locked
naming convention so that the conversion call sites read uniformly.
The try_init_* helpers fold one-shot allocation of the maple-tree
backing state into the first hot-path access; this keeps vmalloc_init()
free of the per-tree GFP_NOWAIT paths and lets the tree machinery
start cold.
No external vmalloc behaviour change in this step. free/busy/lazy
operations still go through the rb_tree and per-node lazy.mt; the new
helpers and globals are wired up by the conversion patches that
follow.
Signed-off-by: Pranjal Arya <pranjal.arya@oss.qualcomm.com>
---
mm/vmalloc.c | 433 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 402 insertions(+), 31 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 1afca3568b9b..67f753d04c96 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -24,6 +24,7 @@
#include <linux/list.h>
#include <linux/notifier.h>
#include <linux/rbtree.h>
+#include <linux/maple_tree.h>
#include <linux/xarray.h>
#include <linux/io.h>
#include <linux/rcupdate.h>
@@ -880,22 +881,22 @@ static bool vmap_initialized __read_mostly;
static struct kmem_cache *vmap_area_cachep;
/*
- * This linked list is used in pair with free_vmap_area_root.
- * It gives O(1) access to prev/next to perform fast coalescing.
+ * This linked list stores free areas sorted by start address.
+ * It gives O(1) access to neighbors for fast coalescing.
*/
static LIST_HEAD(free_vmap_area_list);
+/* Next-fit hint to avoid scanning from list head on each allocation. */
+static unsigned long free_vmap_alloc_hint __maybe_unused = 1;
/*
- * This augment red-black tree represents the free vmap space.
- * All vmap_area objects in this tree are sorted by va->va_start
- * address. It is used for allocation and merging when a vmap
- * object is released.
- *
- * Each vmap_area node contains a maximum available free block
- * of its sub-tree, right or left. Therefore it is possible to
- * find a lowest match of free area.
+ * Maple tree shadow of free_vmap_area_list. It is used for
+ * address lookups and first-fit scans.
*/
static struct rb_root free_vmap_area_root = RB_ROOT;
+static struct maple_tree free_vmap_area_mt __maybe_unused =
+ MTREE_INIT_EXT(free_vmap_area_mt, MT_FLAGS_LOCK_EXTERN, free_vmap_area_lock);
+static bool free_vmap_area_mt_enabled __maybe_unused;
+static bool free_vmap_area_mt_init_tried __maybe_unused;
/*
* Preload a CPU with one object for "no edge" split case. The
@@ -906,14 +907,17 @@ static DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node);
/*
* This structure defines a single, solid model where a list and
- * rb-tree are part of one entity protected by the lock. Nodes are
+ * maple tree are part of one entity protected by the lock. Nodes are
* sorted in ascending order, thus for O(1) access to left/right
* neighbors a list is used as well as for sequential traversal.
*/
-struct rb_list {
+struct mt_list {
struct rb_root root;
+ struct maple_tree mt;
struct list_head head;
spinlock_t lock;
+ bool mt_enabled;
+ bool mt_init_tried;
};
/*
@@ -940,8 +944,8 @@ static struct vmap_node {
bool skip_populate;
/* Bookkeeping data of this node. */
- struct rb_list busy;
- struct rb_list lazy;
+ struct mt_list busy;
+ struct mt_list lazy;
/*
* Ready-to-free areas.
@@ -1051,6 +1055,10 @@ va_size(struct vmap_area *va)
return (va->va_end - va->va_start);
}
+/*
+ * Transitional rb compatibility retained until all rb-only users are moved.
+ * Follow-up patches in this RFC series remove these helpers.
+ */
static __always_inline unsigned long
get_subtree_max_size(struct rb_node *node)
{
@@ -1070,6 +1078,130 @@ static DECLARE_WORK(drain_vmap_work, drain_vmap_area_work);
static __cacheline_aligned_in_smp atomic_long_t vmap_lazy_nr;
+/*
+ * maple nodes are allocated from slab; defer tree population until
+ * slab allocator is up to avoid early-boot failures.
+ */
+static __always_inline bool vmap_mt_runtime_ready(void)
+{
+ return READ_ONCE(vmap_initialized) && slab_is_available();
+}
+
+static __always_inline bool free_mt_supported(void)
+{
+ return free_vmap_area_mt_enabled;
+}
+
+static __always_inline void disable_free_mt_locked(void)
+{
+ lockdep_assert_held(&free_vmap_area_lock);
+
+ if (free_vmap_area_mt_enabled) {
+ __mt_destroy(&free_vmap_area_mt);
+ free_vmap_area_mt_enabled = false;
+ }
+}
+
+static __always_inline void free_mt_store_va_locked(struct vmap_area *va)
+{
+ int err;
+
+ lockdep_assert_held(&free_vmap_area_lock);
+
+ MA_STATE(mas, &free_vmap_area_mt, va->va_start, va->va_end - 1);
+
+ err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
+ if (WARN_ON_ONCE(err))
+ disable_free_mt_locked();
+}
+
+static __always_inline void free_mt_erase_va_locked(struct vmap_area *va)
+{
+ int err;
+
+ lockdep_assert_held(&free_vmap_area_lock);
+
+ MA_STATE(mas, &free_vmap_area_mt, va->va_start, va->va_end - 1);
+
+ err = mas_store_gfp(&mas, NULL, GFP_ATOMIC | __GFP_NOWARN);
+ if (WARN_ON_ONCE(err))
+ disable_free_mt_locked();
+}
+
+static __always_inline void
+free_mt_update_va_locked(struct vmap_area *va, unsigned long old_start,
+ unsigned long old_end)
+{
+ int err;
+
+ lockdep_assert_held(&free_vmap_area_lock);
+
+ MA_STATE(mas_erase, &free_vmap_area_mt, old_start, old_end - 1);
+ MA_STATE(mas_store, &free_vmap_area_mt, va->va_start, va->va_end - 1);
+
+ err = mas_store_gfp(&mas_erase, NULL, GFP_ATOMIC | __GFP_NOWARN);
+ if (WARN_ON_ONCE(err)) {
+ disable_free_mt_locked();
+ return;
+ }
+
+ err = mas_store_gfp(&mas_store, va, GFP_ATOMIC | __GFP_NOWARN);
+ if (WARN_ON_ONCE(err))
+ disable_free_mt_locked();
+}
+
+static void free_mt_rebuild_locked(void)
+{
+ struct vmap_area *va;
+ int err;
+
+ lockdep_assert_held(&free_vmap_area_lock);
+
+ __mt_destroy(&free_vmap_area_mt);
+ free_vmap_area_mt_enabled = true;
+
+ list_for_each_entry(va, &free_vmap_area_list, list) {
+ MA_STATE(mas, &free_vmap_area_mt, va->va_start, va->va_end - 1);
+
+ err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
+ if (WARN_ON_ONCE(err)) {
+ disable_free_mt_locked();
+ return;
+ }
+ }
+}
+
+static __always_inline void try_init_free_mt_locked(void)
+{
+ lockdep_assert_held(&free_vmap_area_lock);
+
+ if (free_vmap_area_mt_init_tried)
+ return;
+
+ if (!vmap_mt_runtime_ready())
+ return;
+
+ free_vmap_area_mt_init_tried = true;
+ free_mt_rebuild_locked();
+}
+
+static __always_inline struct vmap_area *
+__find_vmap_area_list(unsigned long addr, struct list_head *head)
+{
+ struct vmap_area *va;
+
+ addr = (unsigned long)kasan_reset_tag((void *)addr);
+
+ list_for_each_entry(va, head, list) {
+ if (addr < va->va_start)
+ break;
+ if (addr < va->va_end)
+ return va;
+ }
+
+ return NULL;
+}
+
static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
{
struct rb_node *n = root->rb_node;
@@ -1092,29 +1224,268 @@ static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *ro
}
/* Look up the first VA which satisfies addr < va_end, NULL if none. */
-static struct vmap_area *
-__find_vmap_area_exceed_addr(unsigned long addr, struct rb_root *root)
+static __always_inline struct vmap_area *
+__find_vmap_area_exceed_addr_list(unsigned long addr, struct list_head *head)
{
- struct vmap_area *va = NULL;
- struct rb_node *n = root->rb_node;
+ struct vmap_area *va;
addr = (unsigned long)kasan_reset_tag((void *)addr);
- while (n) {
- struct vmap_area *tmp;
+ list_for_each_entry(va, head, list) {
+ if (va->va_end > addr)
+ return va;
+ }
- tmp = rb_entry(n, struct vmap_area, rb_node);
- if (tmp->va_end > addr) {
- va = tmp;
- if (tmp->va_start <= addr)
- break;
+ return NULL;
+}
- n = n->rb_left;
- } else
- n = n->rb_right;
+static __always_inline struct list_head *
+find_vmap_area_insert_point_list(struct vmap_area *va, struct list_head *head)
+{
+ struct vmap_area *tmp;
+ struct list_head *next = head;
+
+ list_for_each_entry(tmp, head, list) {
+ if (tmp->va_start > va->va_start) {
+ next = &tmp->list;
+ break;
+ }
}
- return va;
+ if (next != head) {
+ tmp = list_entry(next, struct vmap_area, list);
+ if (WARN_ON_ONCE(va->va_end > tmp->va_start))
+ return NULL;
+ }
+
+ if (next->prev != head) {
+ tmp = list_entry(next->prev, struct vmap_area, list);
+ if (WARN_ON_ONCE(va->va_start < tmp->va_end))
+ return NULL;
+ }
+
+ return next;
+}
+
+/*
+ * Use maple-tree neighbour lookup to locate insertion point in O(log n),
+ * while preserving sorted-list neighbour traversal.
+ */
+static __always_inline struct list_head *
+find_vmap_area_insert_point_mt(struct vmap_area *va, struct maple_tree *tree,
+ struct list_head *head)
+{
+ struct vmap_area *prev, *next;
+ struct list_head *next_link;
+
+ MA_STATE(mas, tree, va->va_start, va->va_start);
+
+ mas_set(&mas, va->va_start);
+ next = mas_find(&mas, ULONG_MAX);
+
+ if (next) {
+ if (WARN_ON_ONCE(next->va_start <= va->va_start))
+ return NULL;
+ if (WARN_ON_ONCE(va->va_end > next->va_start))
+ return NULL;
+ next_link = &next->list;
+ } else {
+ next_link = head;
+ }
+
+ if (next_link->prev != head) {
+ prev = list_entry(next_link->prev, struct vmap_area, list);
+ if (WARN_ON_ONCE(va->va_start < prev->va_end))
+ return NULL;
+ }
+
+ return next_link;
+}
+
+static __always_inline bool
+insert_vmap_area_list_sorted(struct vmap_area *va, struct list_head *head)
+{
+ struct list_head *next;
+
+ next = find_vmap_area_insert_point_list(va, head);
+ if (!next)
+ return false;
+
+ list_add_tail(&va->list, next);
+ return true;
+}
+
+static __always_inline bool
+insert_vmap_area_list_sorted_mt(struct vmap_area *va, struct maple_tree *tree,
+ struct list_head *head)
+{
+ struct list_head *next;
+
+ next = find_vmap_area_insert_point_mt(va, tree, head);
+ if (!next)
+ return false;
+
+ list_add_tail(&va->list, next);
+ return true;
+}
+
+static __always_inline void
+disable_busy_mt_locked(struct vmap_node *vn)
+{
+ lockdep_assert_held(&vn->busy.lock);
+
+ if (vn->busy.mt_enabled) {
+ __mt_destroy(&vn->busy.mt);
+ vn->busy.mt_enabled = false;
+ }
+
+ vn->busy.mt_init_tried = true;
+}
+
+static __always_inline void
+disable_lazy_mt_locked(struct vmap_node *vn)
+{
+ lockdep_assert_held(&vn->lazy.lock);
+
+ if (vn->lazy.mt_enabled) {
+ __mt_destroy(&vn->lazy.mt);
+ vn->lazy.mt_enabled = false;
+ }
+
+ vn->lazy.mt_init_tried = true;
+}
+
+static void
+busy_mt_rebuild_locked(struct vmap_node *vn)
+{
+ struct vmap_area *va;
+ int err;
+
+ lockdep_assert_held(&vn->busy.lock);
+
+ __mt_destroy(&vn->busy.mt);
+ vn->busy.mt_enabled = true;
+
+ list_for_each_entry(va, &vn->busy.head, list) {
+ MA_STATE(mas, &vn->busy.mt, va->va_start, va->va_end - 1);
+
+ err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
+ if (WARN_ON_ONCE(err)) {
+ disable_busy_mt_locked(vn);
+ return;
+ }
+ }
+}
+
+static __always_inline void
+try_init_busy_mt_locked(struct vmap_node *vn)
+{
+ lockdep_assert_held(&vn->busy.lock);
+
+ if (vn->busy.mt_init_tried)
+ return;
+
+ if (!vmap_mt_runtime_ready())
+ return;
+
+ vn->busy.mt_init_tried = true;
+ busy_mt_rebuild_locked(vn);
+}
+
+static void
+lazy_mt_rebuild_locked(struct vmap_node *vn)
+{
+ struct vmap_area *va;
+ int err;
+
+ lockdep_assert_held(&vn->lazy.lock);
+
+ __mt_destroy(&vn->lazy.mt);
+ vn->lazy.mt_enabled = true;
+
+ list_for_each_entry(va, &vn->lazy.head, list) {
+ MA_STATE(mas, &vn->lazy.mt, va->va_start, va->va_end - 1);
+
+ err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
+ if (WARN_ON_ONCE(err)) {
+ disable_lazy_mt_locked(vn);
+ return;
+ }
+ }
+}
+
+static __always_inline void
+try_init_lazy_mt_locked(struct vmap_node *vn)
+{
+ lockdep_assert_held(&vn->lazy.lock);
+
+ if (vn->lazy.mt_init_tried)
+ return;
+
+ if (!vmap_mt_runtime_ready())
+ return;
+
+ vn->lazy.mt_init_tried = true;
+ lazy_mt_rebuild_locked(vn);
+}
+
+static __always_inline struct vmap_area *
+__find_vmap_area_mt(unsigned long addr, struct maple_tree *tree)
+{
+ MA_STATE(mas, tree, addr, addr);
+
+ addr = (unsigned long)kasan_reset_tag((void *)addr);
+ mas_set(&mas, addr);
+
+ return mas_walk(&mas);
+}
+
+static __always_inline struct vmap_area *
+__find_vmap_area_exceed_addr_mt(unsigned long addr, struct maple_tree *tree)
+{
+ MA_STATE(mas, tree, addr, addr);
+
+ addr = (unsigned long)kasan_reset_tag((void *)addr);
+ mas_set(&mas, addr);
+
+ return mas_find(&mas, ULONG_MAX);
+}
+
+static __always_inline struct vmap_area *
+__find_vmap_area_enclose_addr_mt(unsigned long addr, struct maple_tree *tree)
+{
+ MA_STATE(mas, tree, addr, addr);
+
+ addr = (unsigned long)kasan_reset_tag((void *)addr);
+ mas_set(&mas, addr);
+
+ return mas_find_rev(&mas, 0);
+}
+
+static __always_inline struct vmap_area *
+find_vmap_area_busy_locked(unsigned long addr, struct vmap_node *vn)
+{
+ lockdep_assert_held(&vn->busy.lock);
+
+ try_init_busy_mt_locked(vn);
+
+ if (likely(vn->busy.mt_enabled))
+ return __find_vmap_area_mt(addr, &vn->busy.mt);
+
+ return __find_vmap_area_list(addr, &vn->busy.head);
+}
+
+static __always_inline struct vmap_area *
+find_vmap_area_exceed_addr_busy_locked(unsigned long addr, struct vmap_node *vn)
+{
+ lockdep_assert_held(&vn->busy.lock);
+
+ try_init_busy_mt_locked(vn);
+
+ if (likely(vn->busy.mt_enabled))
+ return __find_vmap_area_exceed_addr_mt(addr, &vn->busy.mt);
+
+ return __find_vmap_area_exceed_addr_list(addr, &vn->busy.head);
}
/*
@@ -1135,7 +1506,7 @@ find_vmap_area_exceed_addr_lock(unsigned long addr, struct vmap_area **va)
for_each_vmap_node(vn) {
spin_lock(&vn->busy.lock);
- *va = __find_vmap_area_exceed_addr(addr, &vn->busy.root);
+ *va = find_vmap_area_exceed_addr_busy_locked(addr, vn);
if (*va)
if (!va_start_lowest || (*va)->va_start < va_start_lowest)
@@ -1152,7 +1523,7 @@ find_vmap_area_exceed_addr_lock(unsigned long addr, struct vmap_area **va)
vn = addr_to_node(va_start_lowest);
spin_lock(&vn->busy.lock);
- *va = __find_vmap_area(va_start_lowest, &vn->busy.root);
+ *va = find_vmap_area_busy_locked(va_start_lowest, vn);
if (*va)
return vn;
--
2.34.1
next prev parent reply other threads:[~2026-06-13 17:20 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-13 17:19 [PATCH RFC 00/12] mm/vmalloc: migrate vmap_area indexing from rb-tree to maple-tree Pranjal Arya
2026-06-13 17:19 ` Pranjal Arya [this message]
2026-06-13 17:19 ` [PATCH RFC 02/12] mm/vmalloc: convert allocation-side gap finding and insertion to maple_tree Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 03/12] mm/vmalloc: convert free, purge, and pcpu paths " Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 04/12] mm/vmalloc: finalize maple-only indexing and shrink struct vmap_area Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 05/12] mm/vmalloc: tighten failure handling under memory pressure Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 06/12] mm/vmalloc: tighten alloc/free hot paths Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 07/12] mm/vmalloc: consolidate occupied tree as authoritative index on hot path Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 08/12] mm/vmalloc: track lazy-purge queue as a list_head Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 09/12] mm/vmalloc: collapse busy-tree find-then-unlink into a single mas_erase Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 10/12] mm/vmalloc: per-CPU caching of free ranges from the maple_tree allocator Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 11/12] mm/vmalloc: O(1) lookup of cached vmap_areas with bounded fast-reject Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 12/12] mm/vmalloc: harden bump-allocator alloc/free against UBSAN array bounds Pranjal Arya
2026-06-13 23:15 ` [PATCH RFC 00/12] mm/vmalloc: migrate vmap_area indexing from rb-tree to maple-tree Matthew Wilcox
2026-06-14 6:35 ` [syzbot ci] " syzbot ci
2026-06-14 6:58 ` [PATCH RFC 00/12] " Uladzislau Rezki
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260613-vmalloc_maple-v1-1-0aa740bb944b@oss.qualcomm.com \
--to=pranjal.arya@oss.qualcomm.com \
--cc=Suzuki.Poulose@arm.com \
--cc=akpm@linux-foundation.org \
--cc=aliceryhl@google.com \
--cc=andrewjballance@gmail.com \
--cc=balbirs@nvidia.com \
--cc=dev.jain@arm.com \
--cc=dvyukov@google.com \
--cc=elver@google.com \
--cc=glider@google.com \
--cc=jackmanb@google.com \
--cc=liam@infradead.org \
--cc=linux-arm-msm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=ljs@kernel.org \
--cc=maple-tree@lists.infradead.org \
--cc=neil.armstrong@linaro.org \
--cc=praan@google.com \
--cc=puranjay@kernel.org \
--cc=santosh.shukla@amd.com \
--cc=shuah@kernel.org \
--cc=smostafa@google.com \
--cc=sudeep.holla@kernel.org \
--cc=surenb@google.com \
--cc=urezki@gmail.com \
--cc=will@kernel.org \
--cc=wkarny@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.