Linux-mm Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Pranjal Arya <pranjal.arya@oss.qualcomm.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	Uladzislau Rezki <urezki@gmail.com>,
	"Liam R. Howlett" <liam@infradead.org>,
	Alice Ryhl <aliceryhl@google.com>,
	Andrew Ballance <andrewjballance@gmail.com>
Cc: linux-arm-msm@vger.kernel.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org, maple-tree@lists.infradead.org,
	Lorenzo Stoakes <ljs@kernel.org>,
	Pranjal Shrivastava <praan@google.com>,
	Will Deacon <will@kernel.org>,
	Suzuki K Poulose <Suzuki.Poulose@arm.com>,
	Neil Armstrong <neil.armstrong@linaro.org>,
	Mostafa Saleh <smostafa@google.com>,
	Balbir Singh <balbirs@nvidia.com>,
	Suren Baghdasaryan <surenb@google.com>,
	Marco Elver <elver@google.com>,
	Dmitry Vyukov <dvyukov@google.com>,
	Alexander Potapenko <glider@google.com>,
	Shuah Khan <shuah@kernel.org>, Dev Jain <dev.jain@arm.com>,
	Brendan Jackman <jackmanb@google.com>,
	Puranjay Mohan <puranjay@kernel.org>,
	Santosh Shukla <santosh.shukla@amd.com>,
	Wyes Karny <wkarny@gmail.com>,
	Pranjal Arya <pranjal.arya@oss.qualcomm.com>,
	Sudeep Holla <sudeep.holla@kernel.org>
Subject: [PATCH RFC 04/12] mm/vmalloc: finalize maple-only indexing and shrink struct vmap_area
Date: Sat, 13 Jun 2026 22:49:46 +0530	[thread overview]
Message-ID: <20260613-vmalloc_maple-v1-4-0aa740bb944b@oss.qualcomm.com> (raw)
In-Reply-To: <20260613-vmalloc_maple-v1-0-0aa740bb944b@oss.qualcomm.com>

With every alloc/free/pcpu path now driven by maple_tree, retire the
augmented rb_tree from struct vmap_area and from include/linux/vmalloc.h.
The struct shrinks by 24 bytes on both x86_64 and arm64 (72 -> 48):

  -       struct rb_node rb_node;
  -       struct list_head list;
  -       union {
  -               unsigned long subtree_max_size;
  -               struct vm_struct *vm;
  -       };
  +       struct list_head list;
  +       struct vm_struct *vm;

Also allow maple_tree_init() to be called twice: vmalloc_init() runs
before start_kernel() reaches its own maple_tree_init() callsite,
and the maple_tree machinery needs to be live for the early seeding
done in vmap_init_free_space().  The second call becomes a no-op
once maple_node_cache is set.

Signed-off-by: Pranjal Arya <pranjal.arya@oss.qualcomm.com>
---
 include/linux/vmalloc.h |   16 +-
 lib/maple_tree.c        |    7 +
 mm/vmalloc.c            | 1123 ++++++++++++++++++-----------------------------
 3 files changed, 436 insertions(+), 710 deletions(-)

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index d87dc7f77f4e..642bca92b804 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -9,7 +9,6 @@
 #include <linux/list.h>
 #include <linux/llist.h>
 #include <asm/page.h>		/* pgprot_t */
-#include <linux/rbtree.h>
 #include <linux/overflow.h>
 
 #include <asm/vmalloc.h>
@@ -72,19 +71,8 @@ struct vmap_area {
 	unsigned long va_start;
 	unsigned long va_end;
 
-	struct rb_node rb_node;         /* address sorted rbtree */
-	struct list_head list;          /* address sorted list */
-
-	/*
-	 * The following two variables can be packed, because
-	 * a vmap_area object can be either:
-	 *    1) in "free" tree (root is free_vmap_area_root)
-	 *    2) or "busy" tree (root is vmap_area_root)
-	 */
-	union {
-		unsigned long subtree_max_size; /* in "free" tree */
-		struct vm_struct *vm;           /* in "busy" tree */
-	};
+	struct list_head list;          /* auxiliary linkage (pool/purge/lazy) */
+	struct vm_struct *vm;
 	unsigned long flags; /* mark type of vm_map_ram area */
 };
 
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index e52876435b77..f3474a107372 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -5634,6 +5634,13 @@ void __init maple_tree_init(void)
 		.sheaf_capacity = 32,
 	};
 
+	/*
+	 * vmalloc_init() may need Maple allocations before start_kernel() reaches
+	 * its own maple_tree_init() callsite. Keep initialization idempotent.
+	 */
+	if (maple_node_cache)
+		return;
+
 	maple_node_cache = kmem_cache_create("maple_node",
 			sizeof(struct maple_node), &args,
 			SLAB_PANIC);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f2117eafa9cf..c908c1a0fcd4 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -23,7 +23,6 @@
 #include <linux/kallsyms.h>
 #include <linux/list.h>
 #include <linux/notifier.h>
-#include <linux/rbtree.h>
 #include <linux/maple_tree.h>
 #include <linux/xarray.h>
 #include <linux/io.h>
@@ -36,7 +35,6 @@
 #include <linux/llist.h>
 #include <linux/uio.h>
 #include <linux/bitops.h>
-#include <linux/rbtree_augmented.h>
 #include <linux/overflow.h>
 #include <linux/pgtable.h>
 #include <linux/hugetlb.h>
@@ -881,22 +879,20 @@ static bool vmap_initialized __read_mostly;
 static struct kmem_cache *vmap_area_cachep;
 
 /*
- * This linked list stores free areas sorted by start address.
- * It gives O(1) access to neighbors for fast coalescing.
+ * Maple tree of free ranges.
  */
-static LIST_HEAD(free_vmap_area_list);
-/* Next-fit hint to avoid scanning from list head on each allocation. */
-static unsigned long free_vmap_alloc_hint __maybe_unused = 1;
-
-/*
- * Maple tree shadow of free_vmap_area_list. It is used for
- * address lookups and first-fit scans.
- */
-static struct rb_root free_vmap_area_root = RB_ROOT;
-static struct maple_tree free_vmap_area_mt __maybe_unused =
-	MTREE_INIT_EXT(free_vmap_area_mt, MT_FLAGS_LOCK_EXTERN, free_vmap_area_lock);
-static bool free_vmap_area_mt_enabled __maybe_unused;
-static bool free_vmap_area_mt_init_tried __maybe_unused;
+static struct maple_tree free_vmap_area_mt =
+	MTREE_INIT_EXT(free_vmap_area_mt,
+		       MT_FLAGS_LOCK_EXTERN | MT_FLAGS_ALLOC_RANGE,
+		       free_vmap_area_lock);
+static bool free_vmap_area_mt_enabled;
+static bool free_vmap_area_mt_init_tried;
+static struct maple_tree occupied_vmap_area_mt =
+	MTREE_INIT_EXT(occupied_vmap_area_mt,
+		       MT_FLAGS_LOCK_EXTERN | MT_FLAGS_ALLOC_RANGE,
+		       free_vmap_area_lock);
+static bool occupied_vmap_area_mt_enabled;
+static bool occupied_vmap_area_mt_init_tried;
 
 /*
  * Preload a CPU with one object for "no edge" split case. The
@@ -905,19 +901,11 @@ static bool free_vmap_area_mt_init_tried __maybe_unused;
  */
 static DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node);
 
-/*
- * This structure defines a single, solid model where a list and
- * maple tree are part of one entity protected by the lock. Nodes are
- * sorted in ascending order, thus for O(1) access to left/right
- * neighbors a list is used as well as for sequential traversal.
- */
+/* Per-node ordered range index backed by Maple Tree. */
 struct mt_list {
-	struct rb_root root;
 	struct maple_tree mt;
-	struct list_head head;
 	spinlock_t lock;
 	bool mt_enabled;
-	bool mt_init_tried;
 };
 
 /*
@@ -1055,22 +1043,6 @@ va_size(struct vmap_area *va)
 	return (va->va_end - va->va_start);
 }
 
-/*
- * Transitional rb compatibility retained until all rb-only users are moved.
- * Follow-up patches in this RFC series remove these helpers.
- */
-static __always_inline unsigned long
-get_subtree_max_size(struct rb_node *node)
-{
-	struct vmap_area *va;
-
-	va = rb_entry_safe(node, struct vmap_area, rb_node);
-	return va ? va->subtree_max_size : 0;
-}
-
-RB_DECLARE_CALLBACKS_MAX(static, free_vmap_area_rb_augment_cb,
-	struct vmap_area, rb_node, unsigned long, subtree_max_size, va_size)
-
 static void reclaim_and_purge_vmap_areas(void);
 static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
 static void drain_vmap_area_work(struct work_struct *work);
@@ -1078,31 +1050,12 @@ static DECLARE_WORK(drain_vmap_work, drain_vmap_area_work);
 
 static __cacheline_aligned_in_smp atomic_long_t vmap_lazy_nr;
 
-/*
- * maple nodes are allocated from slab; defer tree population until
- * slab allocator is up to avoid early-boot failures.
- */
-static __always_inline bool vmap_mt_runtime_ready(void)
-{
-	return READ_ONCE(vmap_initialized) && slab_is_available();
-}
-
 static __always_inline bool free_mt_supported(void)
 {
 	return free_vmap_area_mt_enabled;
 }
 
-static __always_inline void disable_free_mt_locked(void)
-{
-	lockdep_assert_held(&free_vmap_area_lock);
-
-	if (free_vmap_area_mt_enabled) {
-		__mt_destroy(&free_vmap_area_mt);
-		free_vmap_area_mt_enabled = false;
-	}
-}
-
-static __always_inline void free_mt_store_va_locked(struct vmap_area *va)
+static __always_inline bool free_mt_store_va_locked(struct vmap_area *va)
 {
 	int err;
 
@@ -1110,12 +1063,20 @@ static __always_inline void free_mt_store_va_locked(struct vmap_area *va)
 
 	MA_STATE(mas, &free_vmap_area_mt, va->va_start, va->va_end - 1);
 
-	err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
-	if (WARN_ON_ONCE(err))
-		disable_free_mt_locked();
+	err = mas_preallocate(&mas, va, GFP_NOWAIT | __GFP_NOWARN);
+	if (!err) {
+		mas_store_prealloc(&mas, va);
+		mas_destroy(&mas);
+	} else {
+		err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
+		if (WARN_ON_ONCE(err))
+			return false;
+	}
+
+	return true;
 }
 
-static __always_inline void free_mt_erase_va_locked(struct vmap_area *va)
+static __always_inline bool free_mt_erase_va_locked(struct vmap_area *va)
 {
 	int err;
 
@@ -1125,10 +1086,12 @@ static __always_inline void free_mt_erase_va_locked(struct vmap_area *va)
 
 	err = mas_store_gfp(&mas, NULL, GFP_ATOMIC | __GFP_NOWARN);
 	if (WARN_ON_ONCE(err))
-		disable_free_mt_locked();
+		return false;
+
+	return true;
 }
 
-static __always_inline void
+static __always_inline bool
 free_mt_update_va_locked(struct vmap_area *va, unsigned long old_start,
 			 unsigned long old_end)
 {
@@ -1140,35 +1103,14 @@ free_mt_update_va_locked(struct vmap_area *va, unsigned long old_start,
 	MA_STATE(mas_store, &free_vmap_area_mt, va->va_start, va->va_end - 1);
 
 	err = mas_store_gfp(&mas_erase, NULL, GFP_ATOMIC | __GFP_NOWARN);
-	if (WARN_ON_ONCE(err)) {
-		disable_free_mt_locked();
-		return;
-	}
+	if (WARN_ON_ONCE(err))
+		return false;
 
 	err = mas_store_gfp(&mas_store, va, GFP_ATOMIC | __GFP_NOWARN);
 	if (WARN_ON_ONCE(err))
-		disable_free_mt_locked();
-}
-
-static void free_mt_rebuild_locked(void)
-{
-	struct vmap_area *va;
-	int err;
-
-	lockdep_assert_held(&free_vmap_area_lock);
-
-	__mt_destroy(&free_vmap_area_mt);
-	free_vmap_area_mt_enabled = true;
-
-	list_for_each_entry(va, &free_vmap_area_list, list) {
-		MA_STATE(mas, &free_vmap_area_mt, va->va_start, va->va_end - 1);
+		return false;
 
-		err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
-		if (WARN_ON_ONCE(err)) {
-			disable_free_mt_locked();
-			return;
-		}
-	}
+	return true;
 }
 
 static __always_inline void try_init_free_mt_locked(void)
@@ -1178,257 +1120,101 @@ static __always_inline void try_init_free_mt_locked(void)
 	if (free_vmap_area_mt_init_tried)
 		return;
 
-	if (!vmap_mt_runtime_ready())
+	if (!slab_is_available())
 		return;
 
 	free_vmap_area_mt_init_tried = true;
-	free_mt_rebuild_locked();
-}
-
-static __always_inline struct vmap_area *
-__find_vmap_area_list(unsigned long addr, struct list_head *head)
-{
-	struct vmap_area *va;
-
-	addr = (unsigned long)kasan_reset_tag((void *)addr);
-
-	list_for_each_entry(va, head, list) {
-		if (addr < va->va_start)
-			break;
-		if (addr < va->va_end)
-			return va;
-	}
-
-	return NULL;
-}
-
-static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
-{
-	struct rb_node *n = root->rb_node;
-
-	addr = (unsigned long)kasan_reset_tag((void *)addr);
-
-	while (n) {
-		struct vmap_area *va;
-
-		va = rb_entry(n, struct vmap_area, rb_node);
-		if (addr < va->va_start)
-			n = n->rb_left;
-		else if (addr >= va->va_end)
-			n = n->rb_right;
-		else
-			return va;
-	}
-
-	return NULL;
+	free_vmap_area_mt_enabled = true;
 }
 
-/* Look up the first VA which satisfies addr < va_end, NULL if none. */
-static __always_inline struct vmap_area *
-__find_vmap_area_exceed_addr_list(unsigned long addr, struct list_head *head)
+static __always_inline bool occupied_mt_supported(void)
 {
-	struct vmap_area *va;
-
-	addr = (unsigned long)kasan_reset_tag((void *)addr);
-
-	list_for_each_entry(va, head, list) {
-		if (va->va_end > addr)
-			return va;
-	}
-
-	return NULL;
+	return occupied_vmap_area_mt_enabled;
 }
 
-static __always_inline struct list_head *
-find_vmap_area_insert_point_list(struct vmap_area *va, struct list_head *head)
+static __always_inline void try_init_occupied_mt_locked(void)
 {
-	struct vmap_area *tmp;
-	struct list_head *next = head;
-
-	list_for_each_entry(tmp, head, list) {
-		if (tmp->va_start > va->va_start) {
-			next = &tmp->list;
-			break;
-		}
-	}
+	lockdep_assert_held(&free_vmap_area_lock);
 
-	if (next != head) {
-		tmp = list_entry(next, struct vmap_area, list);
-		if (WARN_ON_ONCE(va->va_end > tmp->va_start))
-			return NULL;
-	}
+	if (occupied_vmap_area_mt_init_tried)
+		return;
 
-	if (next->prev != head) {
-		tmp = list_entry(next->prev, struct vmap_area, list);
-		if (WARN_ON_ONCE(va->va_start < tmp->va_end))
-			return NULL;
-	}
+	if (!slab_is_available())
+		return;
 
-	return next;
+	occupied_vmap_area_mt_init_tried = true;
+	occupied_vmap_area_mt_enabled = true;
 }
 
-/*
- * Use maple-tree neighbour lookup to locate insertion point in O(log n),
- * while preserving sorted-list neighbour traversal.
- */
-static __always_inline struct list_head *
-find_vmap_area_insert_point_mt(struct vmap_area *va, struct maple_tree *tree,
-			       struct list_head *head)
+static __always_inline bool
+occupied_mt_store_range_locked(unsigned long start, unsigned long end)
 {
-	struct vmap_area *prev, *next;
-	struct list_head *next_link;
+	int err;
 
-	MA_STATE(mas, tree, va->va_start, va->va_start);
+	lockdep_assert_held(&free_vmap_area_lock);
 
-	mas_set(&mas, va->va_start);
-	next = mas_find(&mas, ULONG_MAX);
+	if (WARN_ON_ONCE(!occupied_mt_supported()))
+		return false;
 
-	if (next) {
-		if (WARN_ON_ONCE(next->va_start <= va->va_start))
-			return NULL;
-		if (WARN_ON_ONCE(va->va_end > next->va_start))
-			return NULL;
-		next_link = &next->list;
-	} else {
-		next_link = head;
-	}
+	MA_STATE(mas, &occupied_vmap_area_mt, start, end - 1);
 
-	if (next_link->prev != head) {
-		prev = list_entry(next_link->prev, struct vmap_area, list);
-		if (WARN_ON_ONCE(va->va_start < prev->va_end))
-			return NULL;
+	err = mas_preallocate(&mas, XA_ZERO_ENTRY, GFP_NOWAIT | __GFP_NOWARN);
+	if (!err) {
+		mas_store_prealloc(&mas, XA_ZERO_ENTRY);
+		mas_destroy(&mas);
+		return true;
 	}
 
-	return next_link;
+	err = mas_store_gfp(&mas, XA_ZERO_ENTRY, GFP_ATOMIC | __GFP_NOWARN);
+	return !WARN_ON_ONCE(err);
 }
 
 static __always_inline bool
-insert_vmap_area_list_sorted(struct vmap_area *va, struct list_head *head)
+occupied_mt_erase_range_locked(unsigned long start, unsigned long end)
 {
-	struct list_head *next;
-
-	next = find_vmap_area_insert_point_list(va, head);
-	if (!next)
-		return false;
-
-	list_add_tail(&va->list, next);
-	return true;
-}
+	int err;
 
-static __always_inline bool
-insert_vmap_area_list_sorted_mt(struct vmap_area *va, struct maple_tree *tree,
-				struct list_head *head)
-{
-	struct list_head *next;
+	lockdep_assert_held(&free_vmap_area_lock);
 
-	next = find_vmap_area_insert_point_mt(va, tree, head);
-	if (!next)
+	if (WARN_ON_ONCE(!occupied_mt_supported()))
 		return false;
 
-	list_add_tail(&va->list, next);
-	return true;
-}
+	MA_STATE(mas, &occupied_vmap_area_mt, start, end - 1);
 
-static __always_inline void
-disable_busy_mt_locked(struct vmap_node *vn)
-{
-	lockdep_assert_held(&vn->busy.lock);
-
-	if (vn->busy.mt_enabled) {
-		__mt_destroy(&vn->busy.mt);
-		vn->busy.mt_enabled = false;
-	}
-
-	vn->busy.mt_init_tried = true;
-}
-
-static __always_inline void
-disable_lazy_mt_locked(struct vmap_node *vn)
-{
-	lockdep_assert_held(&vn->lazy.lock);
-
-	if (vn->lazy.mt_enabled) {
-		__mt_destroy(&vn->lazy.mt);
-		vn->lazy.mt_enabled = false;
-	}
-
-	vn->lazy.mt_init_tried = true;
+	err = mas_store_gfp(&mas, NULL, GFP_ATOMIC | __GFP_NOWARN);
+	return !WARN_ON_ONCE(err);
 }
 
-static void
-busy_mt_rebuild_locked(struct vmap_node *vn)
+static __always_inline bool
+occupied_mt_erase_va_locked(struct vmap_area *va)
 {
-	struct vmap_area *va;
-	int err;
-
-	lockdep_assert_held(&vn->busy.lock);
-
-	__mt_destroy(&vn->busy.mt);
-	vn->busy.mt_enabled = true;
+	lockdep_assert_held(&free_vmap_area_lock);
 
-	list_for_each_entry(va, &vn->busy.head, list) {
-		MA_STATE(mas, &vn->busy.mt, va->va_start, va->va_end - 1);
+	if (!occupied_mt_supported())
+		return true;
 
-		err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
-		if (WARN_ON_ONCE(err)) {
-			disable_busy_mt_locked(vn);
-			return;
-		}
-	}
+	return occupied_mt_erase_range_locked(va->va_start, va->va_end);
 }
 
 static __always_inline void
 try_init_busy_mt_locked(struct vmap_node *vn)
 {
 	lockdep_assert_held(&vn->busy.lock);
-
-	if (vn->busy.mt_init_tried)
-		return;
-
-	if (!vmap_mt_runtime_ready())
-		return;
-
-	vn->busy.mt_init_tried = true;
-	busy_mt_rebuild_locked(vn);
-}
-
-static void
-lazy_mt_rebuild_locked(struct vmap_node *vn)
-{
-	struct vmap_area *va;
-	int err;
-
-	lockdep_assert_held(&vn->lazy.lock);
-
-	__mt_destroy(&vn->lazy.mt);
-	vn->lazy.mt_enabled = true;
-
-	list_for_each_entry(va, &vn->lazy.head, list) {
-		MA_STATE(mas, &vn->lazy.mt, va->va_start, va->va_end - 1);
-
-		err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
-		if (WARN_ON_ONCE(err)) {
-			disable_lazy_mt_locked(vn);
-			return;
-		}
-	}
+	WARN_ON_ONCE(!vn->busy.mt_enabled);
 }
 
 static __always_inline void
 try_init_lazy_mt_locked(struct vmap_node *vn)
 {
 	lockdep_assert_held(&vn->lazy.lock);
-
-	if (vn->lazy.mt_init_tried)
-		return;
-
-	if (!vmap_mt_runtime_ready())
-		return;
-
-	vn->lazy.mt_init_tried = true;
-	lazy_mt_rebuild_locked(vn);
+	WARN_ON_ONCE(!vn->lazy.mt_enabled);
 }
 
+/*
+ * Busy/lazy lookup paths remain lock-based to preserve pointer lifetime
+ * semantics.
+ */
+
 static __always_inline struct vmap_area *
 __find_vmap_area_mt(unsigned long addr, struct maple_tree *tree)
 {
@@ -1462,6 +1248,24 @@ __find_vmap_area_enclose_addr_mt(unsigned long addr, struct maple_tree *tree)
 	return mas_find_rev(&mas, 0);
 }
 
+static __always_inline bool
+validate_vmap_area_range_insert_mt_locked(struct maple_tree *tree,
+					  unsigned long start,
+					  unsigned long end)
+{
+	struct vmap_area *left, *right;
+
+	left = __find_vmap_area_enclose_addr_mt(start, tree);
+	if (left && WARN_ON_ONCE(left->va_end > start))
+		return false;
+
+	right = __find_vmap_area_exceed_addr_mt(start, tree);
+	if (right && WARN_ON_ONCE(right->va_start < end))
+		return false;
+
+	return true;
+}
+
 static __always_inline struct vmap_area *
 find_vmap_area_busy_locked(unsigned long addr, struct vmap_node *vn)
 {
@@ -1469,10 +1273,10 @@ find_vmap_area_busy_locked(unsigned long addr, struct vmap_node *vn)
 
 	try_init_busy_mt_locked(vn);
 
-	if (likely(vn->busy.mt_enabled))
-		return __find_vmap_area_mt(addr, &vn->busy.mt);
+	if (WARN_ON_ONCE(!vn->busy.mt_enabled))
+		return NULL;
 
-	return __find_vmap_area_list(addr, &vn->busy.head);
+	return __find_vmap_area_mt(addr, &vn->busy.mt);
 }
 
 static __always_inline struct vmap_area *
@@ -1482,10 +1286,10 @@ find_vmap_area_exceed_addr_busy_locked(unsigned long addr, struct vmap_node *vn)
 
 	try_init_busy_mt_locked(vn);
 
-	if (likely(vn->busy.mt_enabled))
-		return __find_vmap_area_exceed_addr_mt(addr, &vn->busy.mt);
+	if (WARN_ON_ONCE(!vn->busy.mt_enabled))
+		return NULL;
 
-	return __find_vmap_area_exceed_addr_list(addr, &vn->busy.head);
+	return __find_vmap_area_exceed_addr_mt(addr, &vn->busy.mt);
 }
 
 /*
@@ -1544,22 +1348,27 @@ insert_vmap_area_busy_locked(struct vmap_area *va, struct vmap_node *vn)
 
 	try_init_busy_mt_locked(vn);
 
-	if (likely(vn->busy.mt_enabled)) {
-		MA_STATE(mas, &vn->busy.mt, va->va_start, va->va_end - 1);
+	if (WARN_ON_ONCE(!vn->busy.mt_enabled))
+		return;
 
-		if (!insert_vmap_area_list_sorted_mt(va, &vn->busy.mt,
-						     &vn->busy.head))
-			return;
+	if (!validate_vmap_area_range_insert_mt_locked(&vn->busy.mt,
+						       va->va_start,
+						       va->va_end))
+		return;
 
-		err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
-		if (WARN_ON_ONCE(err))
-			disable_busy_mt_locked(vn);
+	INIT_LIST_HEAD(&va->list);
+
+	MA_STATE(mas, &vn->busy.mt, va->va_start, va->va_end - 1);
 
+	err = mas_preallocate(&mas, va, GFP_NOWAIT | __GFP_NOWARN);
+	if (!err) {
+		mas_store_prealloc(&mas, va);
+		mas_destroy(&mas);
 		return;
 	}
 
-	if (!insert_vmap_area_list_sorted(va, &vn->busy.head))
-		return;
+	err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
+	WARN_ON_ONCE(err);
 }
 
 static __always_inline void
@@ -1569,18 +1378,17 @@ unlink_vmap_area_busy_locked(struct vmap_area *va, struct vmap_node *vn)
 
 	lockdep_assert_held(&vn->busy.lock);
 
-	MA_STATE(mas, &vn->busy.mt, va->va_start, va->va_end - 1);
-
-	list_del_init(&va->list);
-
 	try_init_busy_mt_locked(vn);
 
-	if (unlikely(!vn->busy.mt_enabled))
+	if (WARN_ON_ONCE(!vn->busy.mt_enabled))
 		return;
 
+	MA_STATE(mas, &vn->busy.mt, va->va_start, va->va_end - 1);
+
 	err = mas_store_gfp(&mas, NULL, GFP_ATOMIC | __GFP_NOWARN);
-	if (WARN_ON_ONCE(err))
-		disable_busy_mt_locked(vn);
+	WARN_ON_ONCE(err);
+
+	INIT_LIST_HEAD(&va->list);
 }
 
 static __always_inline void
@@ -1591,23 +1399,27 @@ insert_vmap_area_lazy_locked(struct vmap_area *va, struct vmap_node *vn)
 	lockdep_assert_held(&vn->lazy.lock);
 
 	try_init_lazy_mt_locked(vn);
+	if (WARN_ON_ONCE(!vn->lazy.mt_enabled))
+		return;
 
-	if (likely(vn->lazy.mt_enabled)) {
-		MA_STATE(mas, &vn->lazy.mt, va->va_start, va->va_end - 1);
+	if (!validate_vmap_area_range_insert_mt_locked(&vn->lazy.mt,
+						       va->va_start,
+						       va->va_end))
+		return;
 
-		if (!insert_vmap_area_list_sorted_mt(va, &vn->lazy.mt,
-						     &vn->lazy.head))
-			return;
+	INIT_LIST_HEAD(&va->list);
 
-		err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
-		if (WARN_ON_ONCE(err))
-			disable_lazy_mt_locked(vn);
+	MA_STATE(mas, &vn->lazy.mt, va->va_start, va->va_end - 1);
 
+	err = mas_preallocate(&mas, va, GFP_NOWAIT | __GFP_NOWARN);
+	if (!err) {
+		mas_store_prealloc(&mas, va);
+		mas_destroy(&mas);
 		return;
 	}
 
-	if (!insert_vmap_area_list_sorted(va, &vn->lazy.head))
-		return;
+	err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
+	WARN_ON_ONCE(err);
 }
 
 static __always_inline bool
@@ -1616,60 +1428,56 @@ lazy_vmap_areas_empty_locked(struct vmap_node *vn)
 	lockdep_assert_held(&vn->lazy.lock);
 
 	try_init_lazy_mt_locked(vn);
+	if (WARN_ON_ONCE(!vn->lazy.mt_enabled))
+		return true;
 
-	if (likely(vn->lazy.mt_enabled))
-		return mtree_empty(&vn->lazy.mt);
-
-	return list_empty(&vn->lazy.head);
+	return mtree_empty(&vn->lazy.mt);
 }
 
 static __always_inline void
 move_lazy_vmap_areas_to_purge_locked(struct vmap_node *vn)
 {
 	struct vmap_area *va;
-	int err;
 
 	lockdep_assert_held(&vn->lazy.lock);
 
 	try_init_lazy_mt_locked(vn);
+	if (WARN_ON_ONCE(!vn->lazy.mt_enabled))
+		return;
 
-	if (likely(vn->lazy.mt_enabled)) {
-		list_for_each_entry(va, &vn->lazy.head, list) {
-			MA_STATE(mas, &vn->lazy.mt, va->va_start, va->va_end - 1);
-
-			err = mas_store_gfp(&mas, NULL, GFP_ATOMIC | __GFP_NOWARN);
-			if (WARN_ON_ONCE(err)) {
-				disable_lazy_mt_locked(vn);
-				break;
-			}
-		}
+	MA_STATE(mas, &vn->lazy.mt, 0, 0);
 
-		if (vn->lazy.mt_enabled && WARN_ON_ONCE(!mtree_empty(&vn->lazy.mt)))
-			disable_lazy_mt_locked(vn);
-	}
+	mas_for_each(&mas, va, ULONG_MAX)
+		list_add_tail(&va->list, &vn->purge_list);
 
-	list_replace_init(&vn->lazy.head, &vn->purge_list);
+	__mt_destroy(&vn->lazy.mt);
+	mt_init_flags(&vn->lazy.mt, MT_FLAGS_LOCK_EXTERN);
+	mt_set_external_lock(&vn->lazy.mt, &vn->lazy.lock);
+	vn->lazy.mt_enabled = true;
 }
 
 static __always_inline bool
 insert_vmap_area_free_locked(struct vmap_area *va)
 {
+	struct vmap_area *prev, *next;
+
 	lockdep_assert_held(&free_vmap_area_lock);
 
 	try_init_free_mt_locked();
 
-	if (likely(free_mt_supported())) {
-		if (!insert_vmap_area_list_sorted_mt(va, &free_vmap_area_mt,
-						     &free_vmap_area_list))
-			return false;
+	if (unlikely(!free_mt_supported()))
+		return false;
 
-		free_mt_store_va_locked(va);
-	} else {
-		if (!insert_vmap_area_list_sorted(va, &free_vmap_area_list))
-			return false;
-	}
+	prev = __find_vmap_area_enclose_addr_mt(va->va_start, &free_vmap_area_mt);
+	if (prev && WARN_ON_ONCE(prev->va_end > va->va_start))
+		return false;
 
-	return true;
+	next = __find_vmap_area_exceed_addr_mt(va->va_start, &free_vmap_area_mt);
+	if (next && WARN_ON_ONCE(next->va_start < va->va_end))
+		return false;
+
+	INIT_LIST_HEAD(&va->list);
+	return free_mt_store_va_locked(va);
 }
 
 static __always_inline void
@@ -1677,193 +1485,56 @@ unlink_vmap_area_free_locked(struct vmap_area *va)
 {
 	lockdep_assert_held(&free_vmap_area_lock);
 
-	if (WARN_ON_ONCE(list_empty(&va->list)))
+	if (unlikely(!free_mt_supported()))
 		return;
 
-	if (likely(free_mt_supported()))
-		free_mt_erase_va_locked(va);
-
-	list_del_init(&va->list);
-}
-
-/*
- * Merge de-allocated chunk of VA memory with previous
- * and next free blocks. If coalesce is not done a new
- * free area is inserted. If VA has been merged, it is
- * freed.
- *
- * Please note, it can return NULL in case of overlap
- * ranges, followed by WARN() report. Despite it is a
- * buggy behaviour, a system can be alive and keep
- * ongoing.
- */
-static __always_inline struct vmap_area *
-__merge_or_add_vmap_area(struct vmap_area *va, struct list_head *head, bool update_mt)
-{
-	struct vmap_area *sibling;
-	struct list_head *next;
-	unsigned long old_start, old_end;
-	bool merged = false;
-
-	if (update_mt && free_mt_supported())
-		next = find_vmap_area_insert_point_mt(va, &free_vmap_area_mt, head);
-	else
-		next = find_vmap_area_insert_point_list(va, head);
-
-	if (!next)
-		return NULL;
-
-	/*
-	 * start            end
-	 * |                |
-	 * |<------VA------>|<-----Next----->|
-	 *                  |                |
-	 *                  start            end
-	 */
-	if (next != head) {
-		sibling = list_entry(next, struct vmap_area, list);
-		if (sibling->va_start == va->va_end) {
-			old_start = sibling->va_start;
-			old_end = sibling->va_end;
-			sibling->va_start = va->va_start;
-			if (update_mt && free_mt_supported())
-				free_mt_update_va_locked(sibling, old_start, old_end);
-
-			/* Free vmap_area object. */
-			kmem_cache_free(vmap_area_cachep, va);
-
-			/* Point to the new merged area. */
-			va = sibling;
-			merged = true;
-		}
-	}
-
-	/*
-	 * start            end
-	 * |                |
-	 * |<-----Prev----->|<------VA------>|
-	 *                  |                |
-	 *                  start            end
-	 */
-	if (next->prev != head) {
-		sibling = list_entry(next->prev, struct vmap_area, list);
-		if (sibling->va_end == va->va_start) {
-			/*
-			 * If both neighbors are coalesced, it is important
-			 * to unlink the "next" node first, followed by merging
-			 * with "previous" one.
-			 */
-			if (merged) {
-				if (update_mt)
-					unlink_vmap_area_free_locked(va);
-				else
-					list_del_init(&va->list);
-			}
-
-			old_start = sibling->va_start;
-			old_end = sibling->va_end;
-			sibling->va_end = va->va_end;
-			if (update_mt && free_mt_supported())
-				free_mt_update_va_locked(sibling, old_start, old_end);
-
-			/* Free vmap_area object. */
-			kmem_cache_free(vmap_area_cachep, va);
-
-			/* Point to the new merged area. */
-			va = sibling;
-			merged = true;
-		}
-	}
-
-	if (!merged) {
-		if (update_mt)
-			insert_vmap_area_free_locked(va);
-		else
-			list_add_tail(&va->list, next);
-	}
+	if (!free_mt_erase_va_locked(va))
+		return;
 
-	return va;
-}
-
-static __always_inline struct vmap_area *
-merge_or_add_vmap_area(struct vmap_area *va,
-	struct list_head *head)
-{
-	return __merge_or_add_vmap_area(va, head, false);
+	INIT_LIST_HEAD(&va->list);
 }
 
 static __always_inline struct vmap_area *
 merge_or_add_vmap_area_free_locked(struct vmap_area *va)
 {
+	struct vmap_area *left, *right;
+	unsigned long new_start, new_end;
+
 	lockdep_assert_held(&free_vmap_area_lock);
 
-	va = __merge_or_add_vmap_area(va, &free_vmap_area_list, true);
-	if (va && va->va_start < free_vmap_alloc_hint)
-		free_vmap_alloc_hint = va->va_start;
+	if (unlikely(!free_mt_supported()))
+		return NULL;
 
-	return va;
-}
+	new_start = va->va_start;
+	new_end = va->va_end;
 
-/*
- * Transitional wrappers retained until all legacy rb call sites are switched.
- * Follow-up patches in this series remove these wrappers.
- */
-static __always_inline void
-insert_vmap_area(struct vmap_area *va, struct rb_root *root,
-		 struct list_head *head)
-{
-	struct vmap_node *vn = addr_to_node(va->va_start);
+	left = __find_vmap_area_enclose_addr_mt(new_start, &free_vmap_area_mt);
+	if (left && WARN_ON_ONCE(left->va_end > new_start))
+		return NULL;
 
-	if (head == &free_vmap_area_list) {
-		insert_vmap_area_free_locked(va);
-		return;
+	if (left && left->va_end == new_start) {
+		new_start = left->va_start;
+		unlink_vmap_area_free_locked(left);
+		kmem_cache_free(vmap_area_cachep, left);
 	}
 
-	if (head == &vn->lazy.head) {
-		insert_vmap_area_lazy_locked(va, vn);
-		return;
-	}
-
-	insert_vmap_area_busy_locked(va, vn);
-}
-
-static __always_inline void
-insert_vmap_area_augment(struct vmap_area *va, struct rb_node *from,
-			 struct rb_root *root, struct list_head *head)
-{
-	insert_vmap_area(va, root, head);
-}
-
-static __always_inline void unlink_va(struct vmap_area *va, struct rb_root *root)
-{
-	struct vmap_node *vn = addr_to_node(va->va_start);
+	right = __find_vmap_area_exceed_addr_mt(new_start, &free_vmap_area_mt);
+	if (right && WARN_ON_ONCE(right->va_start < new_end))
+		return NULL;
 
-	if (root == &free_vmap_area_root) {
-		unlink_vmap_area_free_locked(va);
-		return;
+	if (right && right->va_start == new_end) {
+		new_end = right->va_end;
+		unlink_vmap_area_free_locked(right);
+		kmem_cache_free(vmap_area_cachep, right);
 	}
 
-	unlink_vmap_area_busy_locked(va, vn);
-}
+	va->va_start = new_start;
+	va->va_end = new_end;
 
-static __always_inline void
-unlink_va_augment(struct vmap_area *va, struct rb_root *root)
-{
-	unlink_va(va, root);
-}
-
-static __always_inline void augment_tree_propagate_from(struct vmap_area *va)
-{
-}
-
-static __always_inline struct vmap_area *
-merge_or_add_vmap_area_augment(struct vmap_area *va, struct rb_root *root,
-			       struct list_head *head)
-{
-	if (head == &free_vmap_area_list)
-		return merge_or_add_vmap_area_free_locked(va);
+	if (!insert_vmap_area_free_locked(va))
+		return NULL;
 
-	return merge_or_add_vmap_area(va, head);
+	return va;
 }
 
 static __always_inline bool
@@ -1885,86 +1556,57 @@ is_within_this_va(struct vmap_area *va, unsigned long size,
 	return (nva_start_addr + size <= va->va_end);
 }
 
-static __always_inline struct vmap_area *
-find_vmap_lowest_match_list(struct list_head *head, unsigned long size,
-			    unsigned long align, unsigned long vstart)
+static __always_inline bool
+occupied_mt_find_hole_window_locked(unsigned long min, unsigned long max,
+				    unsigned long size, unsigned long align,
+				    unsigned long *addr)
 {
-	struct vmap_area *va;
+	MA_STATE(mas, &occupied_vmap_area_mt, 0, 0);
+	unsigned long search = min;
+	unsigned long hole_end;
 
-	list_for_each_entry(va, head, list) {
-		if (!is_within_this_va(va, size, align, vstart))
-			continue;
+	while (search <= max) {
+		unsigned long candidate, candidate_end;
 
-		return va;
-	}
+		mas_set(&mas, search);
+		if (mas_empty_area(&mas, search, max, size))
+			return false;
 
-	return NULL;
-}
+		hole_end = min(mas.last, max);
+		candidate = ALIGN(mas.index, align);
+		if (candidate < mas.index)
+			return false;
 
-static __always_inline unsigned long
-clamp_vmap_alloc_hint(unsigned long hint, unsigned long vstart,
-		      unsigned long vend)
-{
-	if (hint < vstart || hint >= vend)
-		return vstart;
+		if (check_add_overflow(candidate, size - 1, &candidate_end))
+			return false;
 
-	return hint;
-}
+		if (candidate >= search && candidate_end <= hole_end) {
+			*addr = candidate;
+			return true;
+		}
 
-/*
- * Next-fit scan with wrap-around. Use maple to jump to the first candidate
- * around the hint in O(log n), then continue on the ordered list for cheap
- * neighbour traversal and deterministic coalescing behaviour.
- */
-static __always_inline struct vmap_area *
-find_vmap_match_list_next_fit(struct list_head *head, unsigned long size,
-			      unsigned long align, unsigned long vstart,
-			      unsigned long vend)
-{
-	struct vmap_area *va, *start = NULL;
-	unsigned long hint;
-	bool wrapped;
-
-	hint = clamp_vmap_alloc_hint(free_vmap_alloc_hint, vstart, vend);
-
-	if (hint != vstart) {
-		if (free_mt_supported())
-			start = __find_vmap_area_exceed_addr_mt(hint,
-								&free_vmap_area_mt);
-
-		if (start) {
-			va = start;
-			list_for_each_entry_from(va, head, list) {
-				if (is_within_this_va(va, size, align, hint))
-					return va;
-			}
-		} else {
-			list_for_each_entry(va, head, list) {
-				if (va->va_end <= hint)
-					continue;
+		if (hole_end == ULONG_MAX)
+			return false;
 
-				if (is_within_this_va(va, size, align, hint))
-					return va;
-			}
-		}
+		search = hole_end + 1;
 	}
 
-	wrapped = (hint != vstart);
-	list_for_each_entry(va, head, list) {
-		if (wrapped) {
-			if (start && va == start)
-				break;
-			if (!start && va->va_start >= hint)
-				break;
-		}
+	return false;
+}
 
-		if (is_within_this_va(va, size, align, vstart))
-			return va;
-	}
+static __always_inline unsigned long
+occupied_mt_find_hole_lowest_locked(unsigned long size, unsigned long align,
+				    unsigned long vstart, unsigned long vend)
+{
+	unsigned long addr;
 
-	return NULL;
+	if (occupied_mt_find_hole_window_locked(vstart, vend - 1, size, align, &addr))
+		return addr;
+
+	return -ENOENT;
 }
 
+/* Lowest-match scan directly on maple ordered traversal. */
 static __always_inline struct vmap_area *
 find_vmap_lowest_match_mt(struct maple_tree *tree, unsigned long size,
 			  unsigned long align, unsigned long vstart)
@@ -1989,24 +1631,26 @@ find_vmap_lowest_match_mt(struct maple_tree *tree, unsigned long size,
 #include <linux/random.h>
 
 static struct vmap_area *
-find_vmap_lowest_linear_match(struct list_head *head, unsigned long size,
-	unsigned long align, unsigned long vstart)
+find_vmap_lowest_linear_match(struct maple_tree *tree, unsigned long size,
+			      unsigned long align, unsigned long vstart)
 {
+	MA_STATE(mas, tree, vstart, vstart);
 	struct vmap_area *va;
 
-	list_for_each_entry(va, head, list) {
+	mas_set(&mas, vstart);
+	va = mas_find(&mas, ULONG_MAX);
+	while (va) {
 		if (!is_within_this_va(va, size, align, vstart))
-			continue;
-
-		return va;
+			va = mas_next(&mas, ULONG_MAX);
+		else
+			return va;
 	}
 
-	return NULL;
+	return va;
 }
 
 static void
-find_vmap_lowest_match_check(struct list_head *head, unsigned long size,
-			     unsigned long align)
+find_vmap_lowest_match_check(unsigned long size, unsigned long align)
 {
 	struct vmap_area *va_1, *va_2;
 	unsigned long vstart;
@@ -2015,11 +1659,8 @@ find_vmap_lowest_match_check(struct list_head *head, unsigned long size,
 	get_random_bytes(&rnd, sizeof(rnd));
 	vstart = VMALLOC_START + rnd;
 
-	if (free_mt_supported())
-		va_1 = find_vmap_lowest_match_mt(&free_vmap_area_mt, size, align, vstart);
-	else
-		va_1 = find_vmap_lowest_linear_match(head, size, align, vstart);
-	va_2 = find_vmap_lowest_linear_match(head, size, align, vstart);
+	va_1 = find_vmap_lowest_match_mt(&free_vmap_area_mt, size, align, vstart);
+	va_2 = find_vmap_lowest_linear_match(&free_vmap_area_mt, size, align, vstart);
 
 	if (va_1 != va_2)
 		pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx\n",
@@ -2153,39 +1794,38 @@ va_clip(struct vmap_area *va, unsigned long nva_start_addr,
 	}
 
 	if (type != FL_FIT_TYPE) {
-		if (free_mt_supported())
-			free_mt_update_va_locked(va, old_start, old_end);
+		if (free_mt_supported() &&
+		    !free_mt_update_va_locked(va, old_start, old_end))
+			return -ENOMEM;
 
-		if (lva)	/* type == NE_FIT_TYPE */
-			insert_vmap_area_free_locked(lva);
+		if (lva && !insert_vmap_area_free_locked(lva)) {
+			kmem_cache_free(vmap_area_cachep, lva);
+			return -ENOMEM;
+		}
 	}
 
 	return 0;
 }
 
-static unsigned long
-va_alloc(struct vmap_area *va,
-		unsigned long size, unsigned long align,
-		unsigned long vstart, unsigned long vend)
+static __always_inline bool
+restore_allocated_vmap_range_free_locked(unsigned long start, unsigned long end)
 {
-	unsigned long nva_start_addr;
-	int ret;
+	struct vmap_area *va;
 
-	if (va->va_start > vstart)
-		nva_start_addr = ALIGN(va->va_start, align);
-	else
-		nva_start_addr = ALIGN(vstart, align);
+	lockdep_assert_held(&free_vmap_area_lock);
 
-	/* Check the "vend" restriction. */
-	if (nva_start_addr + size > vend)
-		return -ERANGE;
+	va = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
+	if (!va)
+		return false;
 
-	/* Update the free vmap_area. */
-	ret = va_clip(va, nva_start_addr, size);
-	if (WARN_ON_ONCE(ret))
-		return ret;
+	va->va_start = start;
+	va->va_end = end;
+	if (!insert_vmap_area_free_locked(va)) {
+		kmem_cache_free(vmap_area_cachep, va);
+		return false;
+	}
 
-	return nva_start_addr;
+	return true;
 }
 
 /*
@@ -2196,34 +1836,42 @@ static __always_inline unsigned long
 __alloc_vmap_area(unsigned long size, unsigned long align,
 		  unsigned long vstart, unsigned long vend)
 {
+	int ret;
 	unsigned long nva_start_addr;
+	unsigned long nva_end_addr;
 	struct vmap_area *va;
 
 	lockdep_assert_held(&free_vmap_area_lock);
 
-	/*
-	 * Next-fit with wrap-around lowers repeated list-head scans in
-	 * high-churn workloads.
-	 */
-	va = find_vmap_match_list_next_fit(&free_vmap_area_list, size, align,
-					   vstart, vend);
+	try_init_occupied_mt_locked();
 
-	if (unlikely(!va))
+	if (WARN_ON_ONCE(!occupied_mt_supported()))
 		return -ENOENT;
 
-	nva_start_addr = va_alloc(va, size, align, vstart, vend);
-	if (!IS_ERR_VALUE(nva_start_addr)) {
-		unsigned long next_hint;
+	nva_start_addr = occupied_mt_find_hole_lowest_locked(size, align,
+							     vstart, vend);
+	if (IS_ERR_VALUE(nva_start_addr))
+		return nva_start_addr;
+	nva_end_addr = nva_start_addr + size;
 
-		if (check_add_overflow(nva_start_addr, size, &next_hint))
-			free_vmap_alloc_hint = vstart;
-		else
-			free_vmap_alloc_hint = next_hint;
+	va = __find_vmap_area_mt(nva_start_addr, &free_vmap_area_mt);
+	if (WARN_ON_ONCE(!va))
+		return -ENOENT;
+
+	ret = va_clip(va, nva_start_addr, size);
+	if (WARN_ON_ONCE(ret))
+		return ret;
+
+	if (!occupied_mt_store_range_locked(nva_start_addr, nva_end_addr)) {
+		bool restored;
+
+		restored = restore_allocated_vmap_range_free_locked(nva_start_addr, nva_end_addr);
+		WARN_ON_ONCE(!restored);
+		return -ENOMEM;
 	}
 
 #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
-	if (!IS_ERR_VALUE(nva_start_addr))
-		find_vmap_lowest_match_check(&free_vmap_area_list, size, align);
+	find_vmap_lowest_match_check(size, align);
 #endif
 
 	return nva_start_addr;
@@ -2247,7 +1895,8 @@ static void free_vmap_area(struct vmap_area *va)
 	 * Insert/Merge it back to the free tree/list.
 	 */
 	spin_lock(&free_vmap_area_lock);
-	merge_or_add_vmap_area_free_locked(va);
+	WARN_ON_ONCE(!occupied_mt_erase_va_locked(va));
+	WARN_ON_ONCE(!merge_or_add_vmap_area_free_locked(va));
 	spin_unlock(&free_vmap_area_lock);
 }
 
@@ -2566,24 +2215,36 @@ static DEFINE_MUTEX(vmap_purge_lock);
 /* for per-CPU blocks */
 static void purge_fragmented_blocks_allcpus(void);
 
-static void
-reclaim_list_global(struct list_head *head)
+static bool
+reclaim_list_global(struct list_head *head, bool erase_occupied,
+		    struct list_head *failed)
 {
 	struct vmap_area *va, *n;
+	bool ok = true;
 
 	if (list_empty(head))
-		return;
+		return true;
 
 	spin_lock(&free_vmap_area_lock);
-	list_for_each_entry_safe(va, n, head, list)
-		merge_or_add_vmap_area_free_locked(va);
+	list_for_each_entry_safe(va, n, head, list) {
+		list_del_init(&va->list);
+		if (erase_occupied)
+			WARN_ON_ONCE(!occupied_mt_erase_va_locked(va));
+		if (WARN_ON_ONCE(!merge_or_add_vmap_area_free_locked(va))) {
+			list_add_tail(&va->list, failed);
+			ok = false;
+		}
+	}
 	spin_unlock(&free_vmap_area_lock);
+
+	return ok;
 }
 
 static void
 decay_va_pool_node(struct vmap_node *vn, bool full_decay)
 {
 	LIST_HEAD(decay_list);
+	LIST_HEAD(decay_failed);
 	struct vmap_area *va, *nva;
 	unsigned long n_decay, pool_len;
 	int i;
@@ -2612,7 +2273,7 @@ decay_va_pool_node(struct vmap_node *vn, bool full_decay)
 				break;
 
 			list_del_init(&va->list);
-			merge_or_add_vmap_area(va, &decay_list);
+			list_add_tail(&va->list, &decay_list);
 		}
 
 		/*
@@ -2629,7 +2290,11 @@ decay_va_pool_node(struct vmap_node *vn, bool full_decay)
 		}
 	}
 
-	reclaim_list_global(&decay_list);
+	WARN_ON_ONCE(!reclaim_list_global(&decay_list, false, &decay_failed));
+	list_for_each_entry_safe(va, nva, &decay_failed, list) {
+		list_del_init(&va->list);
+		WARN_ON_ONCE(!node_pool_add_va(vn, va));
+	}
 }
 
 #define KASAN_RELEASE_BATCH_SIZE 32
@@ -2664,8 +2329,10 @@ static void purge_vmap_node(struct work_struct *work)
 	struct vmap_node *vn = container_of(work,
 		struct vmap_node, purge_work);
 	unsigned long nr_purged_pages = 0;
+	unsigned long nr_failed_pages = 0;
 	struct vmap_area *va, *n_va;
 	LIST_HEAD(local_list);
+	LIST_HEAD(local_failed);
 
 	if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
 		kasan_release_vmalloc_node(vn);
@@ -2691,7 +2358,23 @@ static void purge_vmap_node(struct work_struct *work)
 
 	atomic_long_sub(nr_purged_pages, &vmap_lazy_nr);
 
-	reclaim_list_global(&local_list);
+	WARN_ON_ONCE(!reclaim_list_global(&local_list, false, &local_failed));
+	list_for_each_entry_safe(va, n_va, &local_failed, list) {
+		unsigned int vn_id = decode_vn_id(va->flags);
+		struct vmap_node *dst;
+
+		list_del_init(&va->list);
+		dst = is_vn_id_valid(vn_id) ?
+			id_to_node(vn_id) : addr_to_node(va->va_start);
+
+		spin_lock(&dst->lazy.lock);
+		insert_vmap_area_lazy_locked(va, dst);
+		spin_unlock(&dst->lazy.lock);
+		nr_failed_pages += va_size(va) >> PAGE_SHIFT;
+	}
+
+	if (nr_failed_pages)
+		atomic_long_add(nr_failed_pages, &vmap_lazy_nr);
 }
 
 /*
@@ -2823,6 +2506,15 @@ static void free_vmap_area_noflush(struct vmap_area *va)
 	vn = is_vn_id_valid(vn_id) ?
 		id_to_node(vn_id):addr_to_node(va->va_start);
 
+	/*
+	 * Drop occupied-range visibility as soon as the area is freed, even
+	 * though coalescing/reinsertion into the free index remains deferred.
+	 */
+	spin_lock(&free_vmap_area_lock);
+	try_init_occupied_mt_locked();
+	WARN_ON_ONCE(!occupied_mt_erase_va_locked(va));
+	spin_unlock(&free_vmap_area_lock);
+
 	spin_lock(&vn->lazy.lock);
 	insert_vmap_area_lazy_locked(va, vn);
 	spin_unlock(&vn->lazy.lock);
@@ -5240,14 +4932,11 @@ EXPORT_SYMBOL_GPL(free_vm_area);
 
 #ifdef CONFIG_SMP
 static __always_inline struct vmap_area *
-free_vmap_area_prev(struct vmap_area *va)
+free_vmap_area_prev_by_addr(unsigned long addr)
 {
 	lockdep_assert_held(&free_vmap_area_lock);
 
-	if (va->list.prev == &free_vmap_area_list)
-		return NULL;
-
-	return list_entry(va->list.prev, struct vmap_area, list);
+	return __find_vmap_area_enclose_addr_mt(addr, &free_vmap_area_mt);
 }
 
 /**
@@ -5262,19 +4951,9 @@ free_vmap_area_prev(struct vmap_area *va)
 static struct vmap_area *
 pvm_find_va_enclose_addr(unsigned long addr)
 {
-	struct vmap_area *va;
-
 	lockdep_assert_held(&free_vmap_area_lock);
 
-	if (free_mt_supported())
-		return __find_vmap_area_enclose_addr_mt(addr, &free_vmap_area_mt);
-
-	list_for_each_entry_reverse(va, &free_vmap_area_list, list) {
-		if (va->va_start <= addr)
-			return va;
-	}
-
-	return NULL;
+	return __find_vmap_area_enclose_addr_mt(addr, &free_vmap_area_mt);
 }
 
 /**
@@ -5293,13 +4972,19 @@ pvm_determine_end_from_reverse(struct vmap_area **va, unsigned long align)
 	unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
 	unsigned long addr;
 
+	lockdep_assert_held(&free_vmap_area_lock);
+
 	if (likely(*va)) {
-		list_for_each_entry_from_reverse((*va),
-				&free_vmap_area_list, list) {
+		do {
 			addr = min((*va)->va_end & ~(align - 1), vmalloc_end);
 			if ((*va)->va_start < addr)
 				return addr;
-		}
+
+			if ((*va)->va_start == 0)
+				break;
+
+			*va = free_vmap_area_prev_by_addr((*va)->va_start - 1);
+		} while (*va);
 	}
 
 	return 0;
@@ -5382,6 +5067,8 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 	}
 retry:
 	spin_lock(&free_vmap_area_lock);
+	try_init_free_mt_locked();
+	try_init_occupied_mt_locked();
 
 	/* start scanning - we scan from the top, begin with the last area */
 	area = term_area = last_area;
@@ -5419,7 +5106,10 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 		 * If this VA does not fit, move base downwards and recheck.
 		 */
 		if (base + start < va->va_start) {
-			va = free_vmap_area_prev(va);
+			if (va->va_start == 0)
+				va = NULL;
+			else
+				va = free_vmap_area_prev_by_addr(va->va_start - 1);
 			base = pvm_determine_end_from_reverse(&va, align) - end;
 			term_area = area;
 			continue;
@@ -5459,6 +5149,12 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 		va = vas[area];
 		va->va_start = start;
 		va->va_end = start + size;
+
+		if (occupied_mt_supported() &&
+		    !occupied_mt_store_range_locked(va->va_start, va->va_end)) {
+			area++;
+			goto recovery;
+		}
 	}
 
 	spin_unlock(&free_vmap_area_lock);
@@ -5501,11 +5197,14 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 	while (area--) {
 		orig_start = vas[area]->va_start;
 		orig_end = vas[area]->va_end;
+		WARN_ON_ONCE(!occupied_mt_erase_va_locked(vas[area]));
 		va = merge_or_add_vmap_area_free_locked(vas[area]);
+		WARN_ON_ONCE(!va);
 		if (va)
 			kasan_release_vmalloc(orig_start, orig_end,
-				va->va_start, va->va_end,
-				KASAN_VMALLOC_PAGE_RANGE | KASAN_VMALLOC_TLB_FLUSH);
+					      va->va_start, va->va_end,
+					      KASAN_VMALLOC_PAGE_RANGE |
+					      KASAN_VMALLOC_TLB_FLUSH);
 		vas[area] = NULL;
 	}
 
@@ -5551,7 +5250,9 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 	for (area = 0; area < nr_vms; area++) {
 		orig_start = vas[area]->va_start;
 		orig_end = vas[area]->va_end;
+		WARN_ON_ONCE(!occupied_mt_erase_va_locked(vas[area]));
 		va = merge_or_add_vmap_area_free_locked(vas[area]);
+		WARN_ON_ONCE(!va);
 		if (va)
 			kasan_release_vmalloc(orig_start, orig_end,
 				va->va_start, va->va_end,
@@ -5598,7 +5299,7 @@ bool vmalloc_dump_obj(void *object)
 	if (!spin_trylock(&vn->busy.lock))
 		return false;
 
-	va = __find_vmap_area(addr, &vn->busy.root);
+	va = find_vmap_area_busy_locked(addr, vn);
 	if (!va || !va->vm) {
 		spin_unlock(&vn->busy.lock);
 		return false;
@@ -5650,11 +5351,17 @@ static void show_purge_info(struct seq_file *m)
 
 	for_each_vmap_node(vn) {
 		spin_lock(&vn->lazy.lock);
-		list_for_each_entry(va, &vn->lazy.head, list) {
-			seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n",
-				(void *)va->va_start, (void *)va->va_end,
-				va_size(va));
+		try_init_lazy_mt_locked(vn);
+		if (WARN_ON_ONCE(!vn->lazy.mt_enabled)) {
+			spin_unlock(&vn->lazy.lock);
+			continue;
 		}
+		MA_STATE(mas, &vn->lazy.mt, 0, 0);
+
+		mas_for_each(&mas, va, ULONG_MAX)
+			seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n",
+				   (void *)va->va_start, (void *)va->va_end,
+				   va_size(va));
 		spin_unlock(&vn->lazy.lock);
 	}
 }
@@ -5671,12 +5378,19 @@ static int vmalloc_info_show(struct seq_file *m, void *p)
 
 	for_each_vmap_node(vn) {
 		spin_lock(&vn->busy.lock);
-		list_for_each_entry(va, &vn->busy.head, list) {
+		try_init_busy_mt_locked(vn);
+		if (WARN_ON_ONCE(!vn->busy.mt_enabled)) {
+			spin_unlock(&vn->busy.lock);
+			continue;
+		}
+		MA_STATE(mas, &vn->busy.mt, 0, 0);
+
+		mas_for_each(&mas, va, ULONG_MAX) {
 			if (!va->vm) {
 				if (va->flags & VMAP_RAM)
 					seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
-						(void *)va->va_start, (void *)va->va_end,
-						va_size(va));
+						   (void *)va->va_start, (void *)va->va_end,
+						   va_size(va));
 
 				continue;
 			}
@@ -5689,7 +5403,7 @@ static int vmalloc_info_show(struct seq_file *m, void *p)
 			smp_rmb();
 
 			seq_printf(m, "0x%pK-0x%pK %7ld",
-				v->addr, v->addr + v->size, v->size);
+				   v->addr, v->addr + v->size, v->size);
 
 			if (v->caller)
 				seq_printf(m, " %pS", v->caller);
@@ -5754,6 +5468,8 @@ static void __init vmap_init_free_space(void)
 	struct vmap_area *free;
 	struct vm_struct *busy;
 
+	spin_lock(&free_vmap_area_lock);
+
 	/*
 	 *     B     F     B     B     B     F
 	 * -|-----|.....|-----|-----|-----|.....|-
@@ -5761,19 +5477,18 @@ static void __init vmap_init_free_space(void)
 	 *  |<--------------------------------->|
 	 */
 	for (busy = vmlist; busy; busy = busy->next) {
-		if ((unsigned long) busy->addr - vmap_start > 0) {
+		if ((unsigned long)busy->addr - vmap_start > 0) {
 			free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
 			if (!WARN_ON_ONCE(!free)) {
 				free->va_start = vmap_start;
-				free->va_end = (unsigned long) busy->addr;
+				free->va_end = (unsigned long)busy->addr;
 
-				insert_vmap_area_augment(free, NULL,
-					&free_vmap_area_root,
-						&free_vmap_area_list);
+				if (WARN_ON_ONCE(!insert_vmap_area_free_locked(free)))
+					kmem_cache_free(vmap_area_cachep, free);
 			}
 		}
 
-		vmap_start = (unsigned long) busy->addr + busy->size;
+		vmap_start = (unsigned long)busy->addr + busy->size;
 	}
 
 	if (vmap_end - vmap_start > 0) {
@@ -5782,11 +5497,12 @@ static void __init vmap_init_free_space(void)
 			free->va_start = vmap_start;
 			free->va_end = vmap_end;
 
-			insert_vmap_area_augment(free, NULL,
-				&free_vmap_area_root,
-					&free_vmap_area_list);
+			if (WARN_ON_ONCE(!insert_vmap_area_free_locked(free)))
+				kmem_cache_free(vmap_area_cachep, free);
 		}
 	}
+
+	spin_unlock(&free_vmap_area_lock);
 }
 
 static void vmap_init_nodes(void)
@@ -5825,13 +5541,15 @@ static void vmap_init_nodes(void)
 #endif
 
 	for_each_vmap_node(vn) {
-		vn->busy.root = RB_ROOT;
-		INIT_LIST_HEAD(&vn->busy.head);
 		spin_lock_init(&vn->busy.lock);
+		mt_init_flags(&vn->busy.mt, MT_FLAGS_LOCK_EXTERN);
+		mt_set_external_lock(&vn->busy.mt, &vn->busy.lock);
+		vn->busy.mt_enabled = true;
 
-		vn->lazy.root = RB_ROOT;
-		INIT_LIST_HEAD(&vn->lazy.head);
 		spin_lock_init(&vn->lazy.lock);
+		mt_init_flags(&vn->lazy.mt, MT_FLAGS_LOCK_EXTERN);
+		mt_set_external_lock(&vn->lazy.mt, &vn->lazy.lock);
+		vn->lazy.mt_enabled = true;
 
 		for (i = 0; i < MAX_VA_SIZE_PAGES; i++) {
 			INIT_LIST_HEAD(&vn->pool[i].head);
@@ -5881,6 +5599,11 @@ void __init vmalloc_init(void)
 	 * Create the cache for vmap_area objects.
 	 */
 	vmap_area_cachep = KMEM_CACHE(vmap_area, SLAB_PANIC);
+	/*
+	 * vmalloc_init() performs Maple stores/preallocation while importing
+	 * early ranges. Ensure Maple node cache is available at this stage.
+	 */
+	maple_tree_init();
 
 	for_each_possible_cpu(i) {
 		struct vmap_block_queue *vbq;
@@ -5911,7 +5634,15 @@ void __init vmalloc_init(void)
 		va->vm = tmp;
 
 		vn = addr_to_node(va->va_start);
-		insert_vmap_area(va, &vn->busy.root, &vn->busy.head);
+		spin_lock(&vn->busy.lock);
+		insert_vmap_area_busy_locked(va, vn);
+		spin_unlock(&vn->busy.lock);
+
+		spin_lock(&free_vmap_area_lock);
+		try_init_occupied_mt_locked();
+		WARN_ON_ONCE(!occupied_mt_store_range_locked(va->va_start,
+							     va->va_end));
+		spin_unlock(&free_vmap_area_lock);
 	}
 
 	/*

-- 
2.34.1



  parent reply	other threads:[~2026-06-13 17:21 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-13 17:19 [PATCH RFC 00/12] mm/vmalloc: migrate vmap_area indexing from rb-tree to maple-tree Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 01/12] mm/vmalloc: introduce maple_tree-based indexing for vmap_area Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 02/12] mm/vmalloc: convert allocation-side gap finding and insertion to maple_tree Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 03/12] mm/vmalloc: convert free, purge, and pcpu paths " Pranjal Arya
2026-06-13 17:19 ` Pranjal Arya [this message]
2026-06-13 17:19 ` [PATCH RFC 05/12] mm/vmalloc: tighten failure handling under memory pressure Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 06/12] mm/vmalloc: tighten alloc/free hot paths Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 07/12] mm/vmalloc: consolidate occupied tree as authoritative index on hot path Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 08/12] mm/vmalloc: track lazy-purge queue as a list_head Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 09/12] mm/vmalloc: collapse busy-tree find-then-unlink into a single mas_erase Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 10/12] mm/vmalloc: per-CPU caching of free ranges from the maple_tree allocator Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 11/12] mm/vmalloc: O(1) lookup of cached vmap_areas with bounded fast-reject Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 12/12] mm/vmalloc: harden bump-allocator alloc/free against UBSAN array bounds Pranjal Arya
2026-06-13 23:15 ` [PATCH RFC 00/12] mm/vmalloc: migrate vmap_area indexing from rb-tree to maple-tree Matthew Wilcox
2026-06-14  6:35 ` [syzbot ci] " syzbot ci
2026-06-14  6:58 ` [PATCH RFC 00/12] " Uladzislau Rezki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260613-vmalloc_maple-v1-4-0aa740bb944b@oss.qualcomm.com \
    --to=pranjal.arya@oss.qualcomm.com \
    --cc=Suzuki.Poulose@arm.com \
    --cc=akpm@linux-foundation.org \
    --cc=aliceryhl@google.com \
    --cc=andrewjballance@gmail.com \
    --cc=balbirs@nvidia.com \
    --cc=dev.jain@arm.com \
    --cc=dvyukov@google.com \
    --cc=elver@google.com \
    --cc=glider@google.com \
    --cc=jackmanb@google.com \
    --cc=liam@infradead.org \
    --cc=linux-arm-msm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ljs@kernel.org \
    --cc=maple-tree@lists.infradead.org \
    --cc=neil.armstrong@linaro.org \
    --cc=praan@google.com \
    --cc=puranjay@kernel.org \
    --cc=santosh.shukla@amd.com \
    --cc=shuah@kernel.org \
    --cc=smostafa@google.com \
    --cc=sudeep.holla@kernel.org \
    --cc=surenb@google.com \
    --cc=urezki@gmail.com \
    --cc=will@kernel.org \
    --cc=wkarny@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox