From: Pranjal Arya <pranjal.arya@oss.qualcomm.com>
To: Andrew Morton <akpm@linux-foundation.org>,
Uladzislau Rezki <urezki@gmail.com>,
"Liam R. Howlett" <liam@infradead.org>,
Alice Ryhl <aliceryhl@google.com>,
Andrew Ballance <andrewjballance@gmail.com>
Cc: linux-arm-msm@vger.kernel.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org, maple-tree@lists.infradead.org,
Lorenzo Stoakes <ljs@kernel.org>,
Pranjal Shrivastava <praan@google.com>,
Will Deacon <will@kernel.org>,
Suzuki K Poulose <Suzuki.Poulose@arm.com>,
Neil Armstrong <neil.armstrong@linaro.org>,
Mostafa Saleh <smostafa@google.com>,
Balbir Singh <balbirs@nvidia.com>,
Suren Baghdasaryan <surenb@google.com>,
Marco Elver <elver@google.com>,
Dmitry Vyukov <dvyukov@google.com>,
Alexander Potapenko <glider@google.com>,
Shuah Khan <shuah@kernel.org>, Dev Jain <dev.jain@arm.com>,
Brendan Jackman <jackmanb@google.com>,
Puranjay Mohan <puranjay@kernel.org>,
Santosh Shukla <santosh.shukla@amd.com>,
Wyes Karny <wkarny@gmail.com>,
Pranjal Arya <pranjal.arya@oss.qualcomm.com>,
Sudeep Holla <sudeep.holla@kernel.org>
Subject: [PATCH RFC 04/12] mm/vmalloc: finalize maple-only indexing and shrink struct vmap_area
Date: Sat, 13 Jun 2026 22:49:46 +0530 [thread overview]
Message-ID: <20260613-vmalloc_maple-v1-4-0aa740bb944b@oss.qualcomm.com> (raw)
In-Reply-To: <20260613-vmalloc_maple-v1-0-0aa740bb944b@oss.qualcomm.com>
With every alloc/free/pcpu path now driven by maple_tree, retire the
augmented rb_tree from struct vmap_area and from include/linux/vmalloc.h.
The struct shrinks by 24 bytes on both x86_64 and arm64 (72 -> 48):
- struct rb_node rb_node;
- struct list_head list;
- union {
- unsigned long subtree_max_size;
- struct vm_struct *vm;
- };
+ struct list_head list;
+ struct vm_struct *vm;
Also allow maple_tree_init() to be called twice: vmalloc_init() runs
before start_kernel() reaches its own maple_tree_init() callsite,
and the maple_tree machinery needs to be live for the early seeding
done in vmap_init_free_space(). The second call becomes a no-op
once maple_node_cache is set.
Signed-off-by: Pranjal Arya <pranjal.arya@oss.qualcomm.com>
---
include/linux/vmalloc.h | 16 +-
lib/maple_tree.c | 7 +
mm/vmalloc.c | 1123 ++++++++++++++++++-----------------------------
3 files changed, 436 insertions(+), 710 deletions(-)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index d87dc7f77f4e..642bca92b804 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -9,7 +9,6 @@
#include <linux/list.h>
#include <linux/llist.h>
#include <asm/page.h> /* pgprot_t */
-#include <linux/rbtree.h>
#include <linux/overflow.h>
#include <asm/vmalloc.h>
@@ -72,19 +71,8 @@ struct vmap_area {
unsigned long va_start;
unsigned long va_end;
- struct rb_node rb_node; /* address sorted rbtree */
- struct list_head list; /* address sorted list */
-
- /*
- * The following two variables can be packed, because
- * a vmap_area object can be either:
- * 1) in "free" tree (root is free_vmap_area_root)
- * 2) or "busy" tree (root is vmap_area_root)
- */
- union {
- unsigned long subtree_max_size; /* in "free" tree */
- struct vm_struct *vm; /* in "busy" tree */
- };
+ struct list_head list; /* auxiliary linkage (pool/purge/lazy) */
+ struct vm_struct *vm;
unsigned long flags; /* mark type of vm_map_ram area */
};
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index e52876435b77..f3474a107372 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -5634,6 +5634,13 @@ void __init maple_tree_init(void)
.sheaf_capacity = 32,
};
+ /*
+ * vmalloc_init() may need Maple allocations before start_kernel() reaches
+ * its own maple_tree_init() callsite. Keep initialization idempotent.
+ */
+ if (maple_node_cache)
+ return;
+
maple_node_cache = kmem_cache_create("maple_node",
sizeof(struct maple_node), &args,
SLAB_PANIC);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f2117eafa9cf..c908c1a0fcd4 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -23,7 +23,6 @@
#include <linux/kallsyms.h>
#include <linux/list.h>
#include <linux/notifier.h>
-#include <linux/rbtree.h>
#include <linux/maple_tree.h>
#include <linux/xarray.h>
#include <linux/io.h>
@@ -36,7 +35,6 @@
#include <linux/llist.h>
#include <linux/uio.h>
#include <linux/bitops.h>
-#include <linux/rbtree_augmented.h>
#include <linux/overflow.h>
#include <linux/pgtable.h>
#include <linux/hugetlb.h>
@@ -881,22 +879,20 @@ static bool vmap_initialized __read_mostly;
static struct kmem_cache *vmap_area_cachep;
/*
- * This linked list stores free areas sorted by start address.
- * It gives O(1) access to neighbors for fast coalescing.
+ * Maple tree of free ranges.
*/
-static LIST_HEAD(free_vmap_area_list);
-/* Next-fit hint to avoid scanning from list head on each allocation. */
-static unsigned long free_vmap_alloc_hint __maybe_unused = 1;
-
-/*
- * Maple tree shadow of free_vmap_area_list. It is used for
- * address lookups and first-fit scans.
- */
-static struct rb_root free_vmap_area_root = RB_ROOT;
-static struct maple_tree free_vmap_area_mt __maybe_unused =
- MTREE_INIT_EXT(free_vmap_area_mt, MT_FLAGS_LOCK_EXTERN, free_vmap_area_lock);
-static bool free_vmap_area_mt_enabled __maybe_unused;
-static bool free_vmap_area_mt_init_tried __maybe_unused;
+static struct maple_tree free_vmap_area_mt =
+ MTREE_INIT_EXT(free_vmap_area_mt,
+ MT_FLAGS_LOCK_EXTERN | MT_FLAGS_ALLOC_RANGE,
+ free_vmap_area_lock);
+static bool free_vmap_area_mt_enabled;
+static bool free_vmap_area_mt_init_tried;
+static struct maple_tree occupied_vmap_area_mt =
+ MTREE_INIT_EXT(occupied_vmap_area_mt,
+ MT_FLAGS_LOCK_EXTERN | MT_FLAGS_ALLOC_RANGE,
+ free_vmap_area_lock);
+static bool occupied_vmap_area_mt_enabled;
+static bool occupied_vmap_area_mt_init_tried;
/*
* Preload a CPU with one object for "no edge" split case. The
@@ -905,19 +901,11 @@ static bool free_vmap_area_mt_init_tried __maybe_unused;
*/
static DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node);
-/*
- * This structure defines a single, solid model where a list and
- * maple tree are part of one entity protected by the lock. Nodes are
- * sorted in ascending order, thus for O(1) access to left/right
- * neighbors a list is used as well as for sequential traversal.
- */
+/* Per-node ordered range index backed by Maple Tree. */
struct mt_list {
- struct rb_root root;
struct maple_tree mt;
- struct list_head head;
spinlock_t lock;
bool mt_enabled;
- bool mt_init_tried;
};
/*
@@ -1055,22 +1043,6 @@ va_size(struct vmap_area *va)
return (va->va_end - va->va_start);
}
-/*
- * Transitional rb compatibility retained until all rb-only users are moved.
- * Follow-up patches in this RFC series remove these helpers.
- */
-static __always_inline unsigned long
-get_subtree_max_size(struct rb_node *node)
-{
- struct vmap_area *va;
-
- va = rb_entry_safe(node, struct vmap_area, rb_node);
- return va ? va->subtree_max_size : 0;
-}
-
-RB_DECLARE_CALLBACKS_MAX(static, free_vmap_area_rb_augment_cb,
- struct vmap_area, rb_node, unsigned long, subtree_max_size, va_size)
-
static void reclaim_and_purge_vmap_areas(void);
static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
static void drain_vmap_area_work(struct work_struct *work);
@@ -1078,31 +1050,12 @@ static DECLARE_WORK(drain_vmap_work, drain_vmap_area_work);
static __cacheline_aligned_in_smp atomic_long_t vmap_lazy_nr;
-/*
- * maple nodes are allocated from slab; defer tree population until
- * slab allocator is up to avoid early-boot failures.
- */
-static __always_inline bool vmap_mt_runtime_ready(void)
-{
- return READ_ONCE(vmap_initialized) && slab_is_available();
-}
-
static __always_inline bool free_mt_supported(void)
{
return free_vmap_area_mt_enabled;
}
-static __always_inline void disable_free_mt_locked(void)
-{
- lockdep_assert_held(&free_vmap_area_lock);
-
- if (free_vmap_area_mt_enabled) {
- __mt_destroy(&free_vmap_area_mt);
- free_vmap_area_mt_enabled = false;
- }
-}
-
-static __always_inline void free_mt_store_va_locked(struct vmap_area *va)
+static __always_inline bool free_mt_store_va_locked(struct vmap_area *va)
{
int err;
@@ -1110,12 +1063,20 @@ static __always_inline void free_mt_store_va_locked(struct vmap_area *va)
MA_STATE(mas, &free_vmap_area_mt, va->va_start, va->va_end - 1);
- err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
- if (WARN_ON_ONCE(err))
- disable_free_mt_locked();
+ err = mas_preallocate(&mas, va, GFP_NOWAIT | __GFP_NOWARN);
+ if (!err) {
+ mas_store_prealloc(&mas, va);
+ mas_destroy(&mas);
+ } else {
+ err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
+ if (WARN_ON_ONCE(err))
+ return false;
+ }
+
+ return true;
}
-static __always_inline void free_mt_erase_va_locked(struct vmap_area *va)
+static __always_inline bool free_mt_erase_va_locked(struct vmap_area *va)
{
int err;
@@ -1125,10 +1086,12 @@ static __always_inline void free_mt_erase_va_locked(struct vmap_area *va)
err = mas_store_gfp(&mas, NULL, GFP_ATOMIC | __GFP_NOWARN);
if (WARN_ON_ONCE(err))
- disable_free_mt_locked();
+ return false;
+
+ return true;
}
-static __always_inline void
+static __always_inline bool
free_mt_update_va_locked(struct vmap_area *va, unsigned long old_start,
unsigned long old_end)
{
@@ -1140,35 +1103,14 @@ free_mt_update_va_locked(struct vmap_area *va, unsigned long old_start,
MA_STATE(mas_store, &free_vmap_area_mt, va->va_start, va->va_end - 1);
err = mas_store_gfp(&mas_erase, NULL, GFP_ATOMIC | __GFP_NOWARN);
- if (WARN_ON_ONCE(err)) {
- disable_free_mt_locked();
- return;
- }
+ if (WARN_ON_ONCE(err))
+ return false;
err = mas_store_gfp(&mas_store, va, GFP_ATOMIC | __GFP_NOWARN);
if (WARN_ON_ONCE(err))
- disable_free_mt_locked();
-}
-
-static void free_mt_rebuild_locked(void)
-{
- struct vmap_area *va;
- int err;
-
- lockdep_assert_held(&free_vmap_area_lock);
-
- __mt_destroy(&free_vmap_area_mt);
- free_vmap_area_mt_enabled = true;
-
- list_for_each_entry(va, &free_vmap_area_list, list) {
- MA_STATE(mas, &free_vmap_area_mt, va->va_start, va->va_end - 1);
+ return false;
- err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
- if (WARN_ON_ONCE(err)) {
- disable_free_mt_locked();
- return;
- }
- }
+ return true;
}
static __always_inline void try_init_free_mt_locked(void)
@@ -1178,257 +1120,101 @@ static __always_inline void try_init_free_mt_locked(void)
if (free_vmap_area_mt_init_tried)
return;
- if (!vmap_mt_runtime_ready())
+ if (!slab_is_available())
return;
free_vmap_area_mt_init_tried = true;
- free_mt_rebuild_locked();
-}
-
-static __always_inline struct vmap_area *
-__find_vmap_area_list(unsigned long addr, struct list_head *head)
-{
- struct vmap_area *va;
-
- addr = (unsigned long)kasan_reset_tag((void *)addr);
-
- list_for_each_entry(va, head, list) {
- if (addr < va->va_start)
- break;
- if (addr < va->va_end)
- return va;
- }
-
- return NULL;
-}
-
-static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
-{
- struct rb_node *n = root->rb_node;
-
- addr = (unsigned long)kasan_reset_tag((void *)addr);
-
- while (n) {
- struct vmap_area *va;
-
- va = rb_entry(n, struct vmap_area, rb_node);
- if (addr < va->va_start)
- n = n->rb_left;
- else if (addr >= va->va_end)
- n = n->rb_right;
- else
- return va;
- }
-
- return NULL;
+ free_vmap_area_mt_enabled = true;
}
-/* Look up the first VA which satisfies addr < va_end, NULL if none. */
-static __always_inline struct vmap_area *
-__find_vmap_area_exceed_addr_list(unsigned long addr, struct list_head *head)
+static __always_inline bool occupied_mt_supported(void)
{
- struct vmap_area *va;
-
- addr = (unsigned long)kasan_reset_tag((void *)addr);
-
- list_for_each_entry(va, head, list) {
- if (va->va_end > addr)
- return va;
- }
-
- return NULL;
+ return occupied_vmap_area_mt_enabled;
}
-static __always_inline struct list_head *
-find_vmap_area_insert_point_list(struct vmap_area *va, struct list_head *head)
+static __always_inline void try_init_occupied_mt_locked(void)
{
- struct vmap_area *tmp;
- struct list_head *next = head;
-
- list_for_each_entry(tmp, head, list) {
- if (tmp->va_start > va->va_start) {
- next = &tmp->list;
- break;
- }
- }
+ lockdep_assert_held(&free_vmap_area_lock);
- if (next != head) {
- tmp = list_entry(next, struct vmap_area, list);
- if (WARN_ON_ONCE(va->va_end > tmp->va_start))
- return NULL;
- }
+ if (occupied_vmap_area_mt_init_tried)
+ return;
- if (next->prev != head) {
- tmp = list_entry(next->prev, struct vmap_area, list);
- if (WARN_ON_ONCE(va->va_start < tmp->va_end))
- return NULL;
- }
+ if (!slab_is_available())
+ return;
- return next;
+ occupied_vmap_area_mt_init_tried = true;
+ occupied_vmap_area_mt_enabled = true;
}
-/*
- * Use maple-tree neighbour lookup to locate insertion point in O(log n),
- * while preserving sorted-list neighbour traversal.
- */
-static __always_inline struct list_head *
-find_vmap_area_insert_point_mt(struct vmap_area *va, struct maple_tree *tree,
- struct list_head *head)
+static __always_inline bool
+occupied_mt_store_range_locked(unsigned long start, unsigned long end)
{
- struct vmap_area *prev, *next;
- struct list_head *next_link;
+ int err;
- MA_STATE(mas, tree, va->va_start, va->va_start);
+ lockdep_assert_held(&free_vmap_area_lock);
- mas_set(&mas, va->va_start);
- next = mas_find(&mas, ULONG_MAX);
+ if (WARN_ON_ONCE(!occupied_mt_supported()))
+ return false;
- if (next) {
- if (WARN_ON_ONCE(next->va_start <= va->va_start))
- return NULL;
- if (WARN_ON_ONCE(va->va_end > next->va_start))
- return NULL;
- next_link = &next->list;
- } else {
- next_link = head;
- }
+ MA_STATE(mas, &occupied_vmap_area_mt, start, end - 1);
- if (next_link->prev != head) {
- prev = list_entry(next_link->prev, struct vmap_area, list);
- if (WARN_ON_ONCE(va->va_start < prev->va_end))
- return NULL;
+ err = mas_preallocate(&mas, XA_ZERO_ENTRY, GFP_NOWAIT | __GFP_NOWARN);
+ if (!err) {
+ mas_store_prealloc(&mas, XA_ZERO_ENTRY);
+ mas_destroy(&mas);
+ return true;
}
- return next_link;
+ err = mas_store_gfp(&mas, XA_ZERO_ENTRY, GFP_ATOMIC | __GFP_NOWARN);
+ return !WARN_ON_ONCE(err);
}
static __always_inline bool
-insert_vmap_area_list_sorted(struct vmap_area *va, struct list_head *head)
+occupied_mt_erase_range_locked(unsigned long start, unsigned long end)
{
- struct list_head *next;
-
- next = find_vmap_area_insert_point_list(va, head);
- if (!next)
- return false;
-
- list_add_tail(&va->list, next);
- return true;
-}
+ int err;
-static __always_inline bool
-insert_vmap_area_list_sorted_mt(struct vmap_area *va, struct maple_tree *tree,
- struct list_head *head)
-{
- struct list_head *next;
+ lockdep_assert_held(&free_vmap_area_lock);
- next = find_vmap_area_insert_point_mt(va, tree, head);
- if (!next)
+ if (WARN_ON_ONCE(!occupied_mt_supported()))
return false;
- list_add_tail(&va->list, next);
- return true;
-}
+ MA_STATE(mas, &occupied_vmap_area_mt, start, end - 1);
-static __always_inline void
-disable_busy_mt_locked(struct vmap_node *vn)
-{
- lockdep_assert_held(&vn->busy.lock);
-
- if (vn->busy.mt_enabled) {
- __mt_destroy(&vn->busy.mt);
- vn->busy.mt_enabled = false;
- }
-
- vn->busy.mt_init_tried = true;
-}
-
-static __always_inline void
-disable_lazy_mt_locked(struct vmap_node *vn)
-{
- lockdep_assert_held(&vn->lazy.lock);
-
- if (vn->lazy.mt_enabled) {
- __mt_destroy(&vn->lazy.mt);
- vn->lazy.mt_enabled = false;
- }
-
- vn->lazy.mt_init_tried = true;
+ err = mas_store_gfp(&mas, NULL, GFP_ATOMIC | __GFP_NOWARN);
+ return !WARN_ON_ONCE(err);
}
-static void
-busy_mt_rebuild_locked(struct vmap_node *vn)
+static __always_inline bool
+occupied_mt_erase_va_locked(struct vmap_area *va)
{
- struct vmap_area *va;
- int err;
-
- lockdep_assert_held(&vn->busy.lock);
-
- __mt_destroy(&vn->busy.mt);
- vn->busy.mt_enabled = true;
+ lockdep_assert_held(&free_vmap_area_lock);
- list_for_each_entry(va, &vn->busy.head, list) {
- MA_STATE(mas, &vn->busy.mt, va->va_start, va->va_end - 1);
+ if (!occupied_mt_supported())
+ return true;
- err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
- if (WARN_ON_ONCE(err)) {
- disable_busy_mt_locked(vn);
- return;
- }
- }
+ return occupied_mt_erase_range_locked(va->va_start, va->va_end);
}
static __always_inline void
try_init_busy_mt_locked(struct vmap_node *vn)
{
lockdep_assert_held(&vn->busy.lock);
-
- if (vn->busy.mt_init_tried)
- return;
-
- if (!vmap_mt_runtime_ready())
- return;
-
- vn->busy.mt_init_tried = true;
- busy_mt_rebuild_locked(vn);
-}
-
-static void
-lazy_mt_rebuild_locked(struct vmap_node *vn)
-{
- struct vmap_area *va;
- int err;
-
- lockdep_assert_held(&vn->lazy.lock);
-
- __mt_destroy(&vn->lazy.mt);
- vn->lazy.mt_enabled = true;
-
- list_for_each_entry(va, &vn->lazy.head, list) {
- MA_STATE(mas, &vn->lazy.mt, va->va_start, va->va_end - 1);
-
- err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
- if (WARN_ON_ONCE(err)) {
- disable_lazy_mt_locked(vn);
- return;
- }
- }
+ WARN_ON_ONCE(!vn->busy.mt_enabled);
}
static __always_inline void
try_init_lazy_mt_locked(struct vmap_node *vn)
{
lockdep_assert_held(&vn->lazy.lock);
-
- if (vn->lazy.mt_init_tried)
- return;
-
- if (!vmap_mt_runtime_ready())
- return;
-
- vn->lazy.mt_init_tried = true;
- lazy_mt_rebuild_locked(vn);
+ WARN_ON_ONCE(!vn->lazy.mt_enabled);
}
+/*
+ * Busy/lazy lookup paths remain lock-based to preserve pointer lifetime
+ * semantics.
+ */
+
static __always_inline struct vmap_area *
__find_vmap_area_mt(unsigned long addr, struct maple_tree *tree)
{
@@ -1462,6 +1248,24 @@ __find_vmap_area_enclose_addr_mt(unsigned long addr, struct maple_tree *tree)
return mas_find_rev(&mas, 0);
}
+static __always_inline bool
+validate_vmap_area_range_insert_mt_locked(struct maple_tree *tree,
+ unsigned long start,
+ unsigned long end)
+{
+ struct vmap_area *left, *right;
+
+ left = __find_vmap_area_enclose_addr_mt(start, tree);
+ if (left && WARN_ON_ONCE(left->va_end > start))
+ return false;
+
+ right = __find_vmap_area_exceed_addr_mt(start, tree);
+ if (right && WARN_ON_ONCE(right->va_start < end))
+ return false;
+
+ return true;
+}
+
static __always_inline struct vmap_area *
find_vmap_area_busy_locked(unsigned long addr, struct vmap_node *vn)
{
@@ -1469,10 +1273,10 @@ find_vmap_area_busy_locked(unsigned long addr, struct vmap_node *vn)
try_init_busy_mt_locked(vn);
- if (likely(vn->busy.mt_enabled))
- return __find_vmap_area_mt(addr, &vn->busy.mt);
+ if (WARN_ON_ONCE(!vn->busy.mt_enabled))
+ return NULL;
- return __find_vmap_area_list(addr, &vn->busy.head);
+ return __find_vmap_area_mt(addr, &vn->busy.mt);
}
static __always_inline struct vmap_area *
@@ -1482,10 +1286,10 @@ find_vmap_area_exceed_addr_busy_locked(unsigned long addr, struct vmap_node *vn)
try_init_busy_mt_locked(vn);
- if (likely(vn->busy.mt_enabled))
- return __find_vmap_area_exceed_addr_mt(addr, &vn->busy.mt);
+ if (WARN_ON_ONCE(!vn->busy.mt_enabled))
+ return NULL;
- return __find_vmap_area_exceed_addr_list(addr, &vn->busy.head);
+ return __find_vmap_area_exceed_addr_mt(addr, &vn->busy.mt);
}
/*
@@ -1544,22 +1348,27 @@ insert_vmap_area_busy_locked(struct vmap_area *va, struct vmap_node *vn)
try_init_busy_mt_locked(vn);
- if (likely(vn->busy.mt_enabled)) {
- MA_STATE(mas, &vn->busy.mt, va->va_start, va->va_end - 1);
+ if (WARN_ON_ONCE(!vn->busy.mt_enabled))
+ return;
- if (!insert_vmap_area_list_sorted_mt(va, &vn->busy.mt,
- &vn->busy.head))
- return;
+ if (!validate_vmap_area_range_insert_mt_locked(&vn->busy.mt,
+ va->va_start,
+ va->va_end))
+ return;
- err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
- if (WARN_ON_ONCE(err))
- disable_busy_mt_locked(vn);
+ INIT_LIST_HEAD(&va->list);
+
+ MA_STATE(mas, &vn->busy.mt, va->va_start, va->va_end - 1);
+ err = mas_preallocate(&mas, va, GFP_NOWAIT | __GFP_NOWARN);
+ if (!err) {
+ mas_store_prealloc(&mas, va);
+ mas_destroy(&mas);
return;
}
- if (!insert_vmap_area_list_sorted(va, &vn->busy.head))
- return;
+ err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
+ WARN_ON_ONCE(err);
}
static __always_inline void
@@ -1569,18 +1378,17 @@ unlink_vmap_area_busy_locked(struct vmap_area *va, struct vmap_node *vn)
lockdep_assert_held(&vn->busy.lock);
- MA_STATE(mas, &vn->busy.mt, va->va_start, va->va_end - 1);
-
- list_del_init(&va->list);
-
try_init_busy_mt_locked(vn);
- if (unlikely(!vn->busy.mt_enabled))
+ if (WARN_ON_ONCE(!vn->busy.mt_enabled))
return;
+ MA_STATE(mas, &vn->busy.mt, va->va_start, va->va_end - 1);
+
err = mas_store_gfp(&mas, NULL, GFP_ATOMIC | __GFP_NOWARN);
- if (WARN_ON_ONCE(err))
- disable_busy_mt_locked(vn);
+ WARN_ON_ONCE(err);
+
+ INIT_LIST_HEAD(&va->list);
}
static __always_inline void
@@ -1591,23 +1399,27 @@ insert_vmap_area_lazy_locked(struct vmap_area *va, struct vmap_node *vn)
lockdep_assert_held(&vn->lazy.lock);
try_init_lazy_mt_locked(vn);
+ if (WARN_ON_ONCE(!vn->lazy.mt_enabled))
+ return;
- if (likely(vn->lazy.mt_enabled)) {
- MA_STATE(mas, &vn->lazy.mt, va->va_start, va->va_end - 1);
+ if (!validate_vmap_area_range_insert_mt_locked(&vn->lazy.mt,
+ va->va_start,
+ va->va_end))
+ return;
- if (!insert_vmap_area_list_sorted_mt(va, &vn->lazy.mt,
- &vn->lazy.head))
- return;
+ INIT_LIST_HEAD(&va->list);
- err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
- if (WARN_ON_ONCE(err))
- disable_lazy_mt_locked(vn);
+ MA_STATE(mas, &vn->lazy.mt, va->va_start, va->va_end - 1);
+ err = mas_preallocate(&mas, va, GFP_NOWAIT | __GFP_NOWARN);
+ if (!err) {
+ mas_store_prealloc(&mas, va);
+ mas_destroy(&mas);
return;
}
- if (!insert_vmap_area_list_sorted(va, &vn->lazy.head))
- return;
+ err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
+ WARN_ON_ONCE(err);
}
static __always_inline bool
@@ -1616,60 +1428,56 @@ lazy_vmap_areas_empty_locked(struct vmap_node *vn)
lockdep_assert_held(&vn->lazy.lock);
try_init_lazy_mt_locked(vn);
+ if (WARN_ON_ONCE(!vn->lazy.mt_enabled))
+ return true;
- if (likely(vn->lazy.mt_enabled))
- return mtree_empty(&vn->lazy.mt);
-
- return list_empty(&vn->lazy.head);
+ return mtree_empty(&vn->lazy.mt);
}
static __always_inline void
move_lazy_vmap_areas_to_purge_locked(struct vmap_node *vn)
{
struct vmap_area *va;
- int err;
lockdep_assert_held(&vn->lazy.lock);
try_init_lazy_mt_locked(vn);
+ if (WARN_ON_ONCE(!vn->lazy.mt_enabled))
+ return;
- if (likely(vn->lazy.mt_enabled)) {
- list_for_each_entry(va, &vn->lazy.head, list) {
- MA_STATE(mas, &vn->lazy.mt, va->va_start, va->va_end - 1);
-
- err = mas_store_gfp(&mas, NULL, GFP_ATOMIC | __GFP_NOWARN);
- if (WARN_ON_ONCE(err)) {
- disable_lazy_mt_locked(vn);
- break;
- }
- }
+ MA_STATE(mas, &vn->lazy.mt, 0, 0);
- if (vn->lazy.mt_enabled && WARN_ON_ONCE(!mtree_empty(&vn->lazy.mt)))
- disable_lazy_mt_locked(vn);
- }
+ mas_for_each(&mas, va, ULONG_MAX)
+ list_add_tail(&va->list, &vn->purge_list);
- list_replace_init(&vn->lazy.head, &vn->purge_list);
+ __mt_destroy(&vn->lazy.mt);
+ mt_init_flags(&vn->lazy.mt, MT_FLAGS_LOCK_EXTERN);
+ mt_set_external_lock(&vn->lazy.mt, &vn->lazy.lock);
+ vn->lazy.mt_enabled = true;
}
static __always_inline bool
insert_vmap_area_free_locked(struct vmap_area *va)
{
+ struct vmap_area *prev, *next;
+
lockdep_assert_held(&free_vmap_area_lock);
try_init_free_mt_locked();
- if (likely(free_mt_supported())) {
- if (!insert_vmap_area_list_sorted_mt(va, &free_vmap_area_mt,
- &free_vmap_area_list))
- return false;
+ if (unlikely(!free_mt_supported()))
+ return false;
- free_mt_store_va_locked(va);
- } else {
- if (!insert_vmap_area_list_sorted(va, &free_vmap_area_list))
- return false;
- }
+ prev = __find_vmap_area_enclose_addr_mt(va->va_start, &free_vmap_area_mt);
+ if (prev && WARN_ON_ONCE(prev->va_end > va->va_start))
+ return false;
- return true;
+ next = __find_vmap_area_exceed_addr_mt(va->va_start, &free_vmap_area_mt);
+ if (next && WARN_ON_ONCE(next->va_start < va->va_end))
+ return false;
+
+ INIT_LIST_HEAD(&va->list);
+ return free_mt_store_va_locked(va);
}
static __always_inline void
@@ -1677,193 +1485,56 @@ unlink_vmap_area_free_locked(struct vmap_area *va)
{
lockdep_assert_held(&free_vmap_area_lock);
- if (WARN_ON_ONCE(list_empty(&va->list)))
+ if (unlikely(!free_mt_supported()))
return;
- if (likely(free_mt_supported()))
- free_mt_erase_va_locked(va);
-
- list_del_init(&va->list);
-}
-
-/*
- * Merge de-allocated chunk of VA memory with previous
- * and next free blocks. If coalesce is not done a new
- * free area is inserted. If VA has been merged, it is
- * freed.
- *
- * Please note, it can return NULL in case of overlap
- * ranges, followed by WARN() report. Despite it is a
- * buggy behaviour, a system can be alive and keep
- * ongoing.
- */
-static __always_inline struct vmap_area *
-__merge_or_add_vmap_area(struct vmap_area *va, struct list_head *head, bool update_mt)
-{
- struct vmap_area *sibling;
- struct list_head *next;
- unsigned long old_start, old_end;
- bool merged = false;
-
- if (update_mt && free_mt_supported())
- next = find_vmap_area_insert_point_mt(va, &free_vmap_area_mt, head);
- else
- next = find_vmap_area_insert_point_list(va, head);
-
- if (!next)
- return NULL;
-
- /*
- * start end
- * | |
- * |<------VA------>|<-----Next----->|
- * | |
- * start end
- */
- if (next != head) {
- sibling = list_entry(next, struct vmap_area, list);
- if (sibling->va_start == va->va_end) {
- old_start = sibling->va_start;
- old_end = sibling->va_end;
- sibling->va_start = va->va_start;
- if (update_mt && free_mt_supported())
- free_mt_update_va_locked(sibling, old_start, old_end);
-
- /* Free vmap_area object. */
- kmem_cache_free(vmap_area_cachep, va);
-
- /* Point to the new merged area. */
- va = sibling;
- merged = true;
- }
- }
-
- /*
- * start end
- * | |
- * |<-----Prev----->|<------VA------>|
- * | |
- * start end
- */
- if (next->prev != head) {
- sibling = list_entry(next->prev, struct vmap_area, list);
- if (sibling->va_end == va->va_start) {
- /*
- * If both neighbors are coalesced, it is important
- * to unlink the "next" node first, followed by merging
- * with "previous" one.
- */
- if (merged) {
- if (update_mt)
- unlink_vmap_area_free_locked(va);
- else
- list_del_init(&va->list);
- }
-
- old_start = sibling->va_start;
- old_end = sibling->va_end;
- sibling->va_end = va->va_end;
- if (update_mt && free_mt_supported())
- free_mt_update_va_locked(sibling, old_start, old_end);
-
- /* Free vmap_area object. */
- kmem_cache_free(vmap_area_cachep, va);
-
- /* Point to the new merged area. */
- va = sibling;
- merged = true;
- }
- }
-
- if (!merged) {
- if (update_mt)
- insert_vmap_area_free_locked(va);
- else
- list_add_tail(&va->list, next);
- }
+ if (!free_mt_erase_va_locked(va))
+ return;
- return va;
-}
-
-static __always_inline struct vmap_area *
-merge_or_add_vmap_area(struct vmap_area *va,
- struct list_head *head)
-{
- return __merge_or_add_vmap_area(va, head, false);
+ INIT_LIST_HEAD(&va->list);
}
static __always_inline struct vmap_area *
merge_or_add_vmap_area_free_locked(struct vmap_area *va)
{
+ struct vmap_area *left, *right;
+ unsigned long new_start, new_end;
+
lockdep_assert_held(&free_vmap_area_lock);
- va = __merge_or_add_vmap_area(va, &free_vmap_area_list, true);
- if (va && va->va_start < free_vmap_alloc_hint)
- free_vmap_alloc_hint = va->va_start;
+ if (unlikely(!free_mt_supported()))
+ return NULL;
- return va;
-}
+ new_start = va->va_start;
+ new_end = va->va_end;
-/*
- * Transitional wrappers retained until all legacy rb call sites are switched.
- * Follow-up patches in this series remove these wrappers.
- */
-static __always_inline void
-insert_vmap_area(struct vmap_area *va, struct rb_root *root,
- struct list_head *head)
-{
- struct vmap_node *vn = addr_to_node(va->va_start);
+ left = __find_vmap_area_enclose_addr_mt(new_start, &free_vmap_area_mt);
+ if (left && WARN_ON_ONCE(left->va_end > new_start))
+ return NULL;
- if (head == &free_vmap_area_list) {
- insert_vmap_area_free_locked(va);
- return;
+ if (left && left->va_end == new_start) {
+ new_start = left->va_start;
+ unlink_vmap_area_free_locked(left);
+ kmem_cache_free(vmap_area_cachep, left);
}
- if (head == &vn->lazy.head) {
- insert_vmap_area_lazy_locked(va, vn);
- return;
- }
-
- insert_vmap_area_busy_locked(va, vn);
-}
-
-static __always_inline void
-insert_vmap_area_augment(struct vmap_area *va, struct rb_node *from,
- struct rb_root *root, struct list_head *head)
-{
- insert_vmap_area(va, root, head);
-}
-
-static __always_inline void unlink_va(struct vmap_area *va, struct rb_root *root)
-{
- struct vmap_node *vn = addr_to_node(va->va_start);
+ right = __find_vmap_area_exceed_addr_mt(new_start, &free_vmap_area_mt);
+ if (right && WARN_ON_ONCE(right->va_start < new_end))
+ return NULL;
- if (root == &free_vmap_area_root) {
- unlink_vmap_area_free_locked(va);
- return;
+ if (right && right->va_start == new_end) {
+ new_end = right->va_end;
+ unlink_vmap_area_free_locked(right);
+ kmem_cache_free(vmap_area_cachep, right);
}
- unlink_vmap_area_busy_locked(va, vn);
-}
+ va->va_start = new_start;
+ va->va_end = new_end;
-static __always_inline void
-unlink_va_augment(struct vmap_area *va, struct rb_root *root)
-{
- unlink_va(va, root);
-}
-
-static __always_inline void augment_tree_propagate_from(struct vmap_area *va)
-{
-}
-
-static __always_inline struct vmap_area *
-merge_or_add_vmap_area_augment(struct vmap_area *va, struct rb_root *root,
- struct list_head *head)
-{
- if (head == &free_vmap_area_list)
- return merge_or_add_vmap_area_free_locked(va);
+ if (!insert_vmap_area_free_locked(va))
+ return NULL;
- return merge_or_add_vmap_area(va, head);
+ return va;
}
static __always_inline bool
@@ -1885,86 +1556,57 @@ is_within_this_va(struct vmap_area *va, unsigned long size,
return (nva_start_addr + size <= va->va_end);
}
-static __always_inline struct vmap_area *
-find_vmap_lowest_match_list(struct list_head *head, unsigned long size,
- unsigned long align, unsigned long vstart)
+static __always_inline bool
+occupied_mt_find_hole_window_locked(unsigned long min, unsigned long max,
+ unsigned long size, unsigned long align,
+ unsigned long *addr)
{
- struct vmap_area *va;
+ MA_STATE(mas, &occupied_vmap_area_mt, 0, 0);
+ unsigned long search = min;
+ unsigned long hole_end;
- list_for_each_entry(va, head, list) {
- if (!is_within_this_va(va, size, align, vstart))
- continue;
+ while (search <= max) {
+ unsigned long candidate, candidate_end;
- return va;
- }
+ mas_set(&mas, search);
+ if (mas_empty_area(&mas, search, max, size))
+ return false;
- return NULL;
-}
+ hole_end = min(mas.last, max);
+ candidate = ALIGN(mas.index, align);
+ if (candidate < mas.index)
+ return false;
-static __always_inline unsigned long
-clamp_vmap_alloc_hint(unsigned long hint, unsigned long vstart,
- unsigned long vend)
-{
- if (hint < vstart || hint >= vend)
- return vstart;
+ if (check_add_overflow(candidate, size - 1, &candidate_end))
+ return false;
- return hint;
-}
+ if (candidate >= search && candidate_end <= hole_end) {
+ *addr = candidate;
+ return true;
+ }
-/*
- * Next-fit scan with wrap-around. Use maple to jump to the first candidate
- * around the hint in O(log n), then continue on the ordered list for cheap
- * neighbour traversal and deterministic coalescing behaviour.
- */
-static __always_inline struct vmap_area *
-find_vmap_match_list_next_fit(struct list_head *head, unsigned long size,
- unsigned long align, unsigned long vstart,
- unsigned long vend)
-{
- struct vmap_area *va, *start = NULL;
- unsigned long hint;
- bool wrapped;
-
- hint = clamp_vmap_alloc_hint(free_vmap_alloc_hint, vstart, vend);
-
- if (hint != vstart) {
- if (free_mt_supported())
- start = __find_vmap_area_exceed_addr_mt(hint,
- &free_vmap_area_mt);
-
- if (start) {
- va = start;
- list_for_each_entry_from(va, head, list) {
- if (is_within_this_va(va, size, align, hint))
- return va;
- }
- } else {
- list_for_each_entry(va, head, list) {
- if (va->va_end <= hint)
- continue;
+ if (hole_end == ULONG_MAX)
+ return false;
- if (is_within_this_va(va, size, align, hint))
- return va;
- }
- }
+ search = hole_end + 1;
}
- wrapped = (hint != vstart);
- list_for_each_entry(va, head, list) {
- if (wrapped) {
- if (start && va == start)
- break;
- if (!start && va->va_start >= hint)
- break;
- }
+ return false;
+}
- if (is_within_this_va(va, size, align, vstart))
- return va;
- }
+static __always_inline unsigned long
+occupied_mt_find_hole_lowest_locked(unsigned long size, unsigned long align,
+ unsigned long vstart, unsigned long vend)
+{
+ unsigned long addr;
- return NULL;
+ if (occupied_mt_find_hole_window_locked(vstart, vend - 1, size, align, &addr))
+ return addr;
+
+ return -ENOENT;
}
+/* Lowest-match scan directly on maple ordered traversal. */
static __always_inline struct vmap_area *
find_vmap_lowest_match_mt(struct maple_tree *tree, unsigned long size,
unsigned long align, unsigned long vstart)
@@ -1989,24 +1631,26 @@ find_vmap_lowest_match_mt(struct maple_tree *tree, unsigned long size,
#include <linux/random.h>
static struct vmap_area *
-find_vmap_lowest_linear_match(struct list_head *head, unsigned long size,
- unsigned long align, unsigned long vstart)
+find_vmap_lowest_linear_match(struct maple_tree *tree, unsigned long size,
+ unsigned long align, unsigned long vstart)
{
+ MA_STATE(mas, tree, vstart, vstart);
struct vmap_area *va;
- list_for_each_entry(va, head, list) {
+ mas_set(&mas, vstart);
+ va = mas_find(&mas, ULONG_MAX);
+ while (va) {
if (!is_within_this_va(va, size, align, vstart))
- continue;
-
- return va;
+ va = mas_next(&mas, ULONG_MAX);
+ else
+ return va;
}
- return NULL;
+ return va;
}
static void
-find_vmap_lowest_match_check(struct list_head *head, unsigned long size,
- unsigned long align)
+find_vmap_lowest_match_check(unsigned long size, unsigned long align)
{
struct vmap_area *va_1, *va_2;
unsigned long vstart;
@@ -2015,11 +1659,8 @@ find_vmap_lowest_match_check(struct list_head *head, unsigned long size,
get_random_bytes(&rnd, sizeof(rnd));
vstart = VMALLOC_START + rnd;
- if (free_mt_supported())
- va_1 = find_vmap_lowest_match_mt(&free_vmap_area_mt, size, align, vstart);
- else
- va_1 = find_vmap_lowest_linear_match(head, size, align, vstart);
- va_2 = find_vmap_lowest_linear_match(head, size, align, vstart);
+ va_1 = find_vmap_lowest_match_mt(&free_vmap_area_mt, size, align, vstart);
+ va_2 = find_vmap_lowest_linear_match(&free_vmap_area_mt, size, align, vstart);
if (va_1 != va_2)
pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx\n",
@@ -2153,39 +1794,38 @@ va_clip(struct vmap_area *va, unsigned long nva_start_addr,
}
if (type != FL_FIT_TYPE) {
- if (free_mt_supported())
- free_mt_update_va_locked(va, old_start, old_end);
+ if (free_mt_supported() &&
+ !free_mt_update_va_locked(va, old_start, old_end))
+ return -ENOMEM;
- if (lva) /* type == NE_FIT_TYPE */
- insert_vmap_area_free_locked(lva);
+ if (lva && !insert_vmap_area_free_locked(lva)) {
+ kmem_cache_free(vmap_area_cachep, lva);
+ return -ENOMEM;
+ }
}
return 0;
}
-static unsigned long
-va_alloc(struct vmap_area *va,
- unsigned long size, unsigned long align,
- unsigned long vstart, unsigned long vend)
+static __always_inline bool
+restore_allocated_vmap_range_free_locked(unsigned long start, unsigned long end)
{
- unsigned long nva_start_addr;
- int ret;
+ struct vmap_area *va;
- if (va->va_start > vstart)
- nva_start_addr = ALIGN(va->va_start, align);
- else
- nva_start_addr = ALIGN(vstart, align);
+ lockdep_assert_held(&free_vmap_area_lock);
- /* Check the "vend" restriction. */
- if (nva_start_addr + size > vend)
- return -ERANGE;
+ va = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
+ if (!va)
+ return false;
- /* Update the free vmap_area. */
- ret = va_clip(va, nva_start_addr, size);
- if (WARN_ON_ONCE(ret))
- return ret;
+ va->va_start = start;
+ va->va_end = end;
+ if (!insert_vmap_area_free_locked(va)) {
+ kmem_cache_free(vmap_area_cachep, va);
+ return false;
+ }
- return nva_start_addr;
+ return true;
}
/*
@@ -2196,34 +1836,42 @@ static __always_inline unsigned long
__alloc_vmap_area(unsigned long size, unsigned long align,
unsigned long vstart, unsigned long vend)
{
+ int ret;
unsigned long nva_start_addr;
+ unsigned long nva_end_addr;
struct vmap_area *va;
lockdep_assert_held(&free_vmap_area_lock);
- /*
- * Next-fit with wrap-around lowers repeated list-head scans in
- * high-churn workloads.
- */
- va = find_vmap_match_list_next_fit(&free_vmap_area_list, size, align,
- vstart, vend);
+ try_init_occupied_mt_locked();
- if (unlikely(!va))
+ if (WARN_ON_ONCE(!occupied_mt_supported()))
return -ENOENT;
- nva_start_addr = va_alloc(va, size, align, vstart, vend);
- if (!IS_ERR_VALUE(nva_start_addr)) {
- unsigned long next_hint;
+ nva_start_addr = occupied_mt_find_hole_lowest_locked(size, align,
+ vstart, vend);
+ if (IS_ERR_VALUE(nva_start_addr))
+ return nva_start_addr;
+ nva_end_addr = nva_start_addr + size;
- if (check_add_overflow(nva_start_addr, size, &next_hint))
- free_vmap_alloc_hint = vstart;
- else
- free_vmap_alloc_hint = next_hint;
+ va = __find_vmap_area_mt(nva_start_addr, &free_vmap_area_mt);
+ if (WARN_ON_ONCE(!va))
+ return -ENOENT;
+
+ ret = va_clip(va, nva_start_addr, size);
+ if (WARN_ON_ONCE(ret))
+ return ret;
+
+ if (!occupied_mt_store_range_locked(nva_start_addr, nva_end_addr)) {
+ bool restored;
+
+ restored = restore_allocated_vmap_range_free_locked(nva_start_addr, nva_end_addr);
+ WARN_ON_ONCE(!restored);
+ return -ENOMEM;
}
#if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
- if (!IS_ERR_VALUE(nva_start_addr))
- find_vmap_lowest_match_check(&free_vmap_area_list, size, align);
+ find_vmap_lowest_match_check(size, align);
#endif
return nva_start_addr;
@@ -2247,7 +1895,8 @@ static void free_vmap_area(struct vmap_area *va)
* Insert/Merge it back to the free tree/list.
*/
spin_lock(&free_vmap_area_lock);
- merge_or_add_vmap_area_free_locked(va);
+ WARN_ON_ONCE(!occupied_mt_erase_va_locked(va));
+ WARN_ON_ONCE(!merge_or_add_vmap_area_free_locked(va));
spin_unlock(&free_vmap_area_lock);
}
@@ -2566,24 +2215,36 @@ static DEFINE_MUTEX(vmap_purge_lock);
/* for per-CPU blocks */
static void purge_fragmented_blocks_allcpus(void);
-static void
-reclaim_list_global(struct list_head *head)
+static bool
+reclaim_list_global(struct list_head *head, bool erase_occupied,
+ struct list_head *failed)
{
struct vmap_area *va, *n;
+ bool ok = true;
if (list_empty(head))
- return;
+ return true;
spin_lock(&free_vmap_area_lock);
- list_for_each_entry_safe(va, n, head, list)
- merge_or_add_vmap_area_free_locked(va);
+ list_for_each_entry_safe(va, n, head, list) {
+ list_del_init(&va->list);
+ if (erase_occupied)
+ WARN_ON_ONCE(!occupied_mt_erase_va_locked(va));
+ if (WARN_ON_ONCE(!merge_or_add_vmap_area_free_locked(va))) {
+ list_add_tail(&va->list, failed);
+ ok = false;
+ }
+ }
spin_unlock(&free_vmap_area_lock);
+
+ return ok;
}
static void
decay_va_pool_node(struct vmap_node *vn, bool full_decay)
{
LIST_HEAD(decay_list);
+ LIST_HEAD(decay_failed);
struct vmap_area *va, *nva;
unsigned long n_decay, pool_len;
int i;
@@ -2612,7 +2273,7 @@ decay_va_pool_node(struct vmap_node *vn, bool full_decay)
break;
list_del_init(&va->list);
- merge_or_add_vmap_area(va, &decay_list);
+ list_add_tail(&va->list, &decay_list);
}
/*
@@ -2629,7 +2290,11 @@ decay_va_pool_node(struct vmap_node *vn, bool full_decay)
}
}
- reclaim_list_global(&decay_list);
+ WARN_ON_ONCE(!reclaim_list_global(&decay_list, false, &decay_failed));
+ list_for_each_entry_safe(va, nva, &decay_failed, list) {
+ list_del_init(&va->list);
+ WARN_ON_ONCE(!node_pool_add_va(vn, va));
+ }
}
#define KASAN_RELEASE_BATCH_SIZE 32
@@ -2664,8 +2329,10 @@ static void purge_vmap_node(struct work_struct *work)
struct vmap_node *vn = container_of(work,
struct vmap_node, purge_work);
unsigned long nr_purged_pages = 0;
+ unsigned long nr_failed_pages = 0;
struct vmap_area *va, *n_va;
LIST_HEAD(local_list);
+ LIST_HEAD(local_failed);
if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
kasan_release_vmalloc_node(vn);
@@ -2691,7 +2358,23 @@ static void purge_vmap_node(struct work_struct *work)
atomic_long_sub(nr_purged_pages, &vmap_lazy_nr);
- reclaim_list_global(&local_list);
+ WARN_ON_ONCE(!reclaim_list_global(&local_list, false, &local_failed));
+ list_for_each_entry_safe(va, n_va, &local_failed, list) {
+ unsigned int vn_id = decode_vn_id(va->flags);
+ struct vmap_node *dst;
+
+ list_del_init(&va->list);
+ dst = is_vn_id_valid(vn_id) ?
+ id_to_node(vn_id) : addr_to_node(va->va_start);
+
+ spin_lock(&dst->lazy.lock);
+ insert_vmap_area_lazy_locked(va, dst);
+ spin_unlock(&dst->lazy.lock);
+ nr_failed_pages += va_size(va) >> PAGE_SHIFT;
+ }
+
+ if (nr_failed_pages)
+ atomic_long_add(nr_failed_pages, &vmap_lazy_nr);
}
/*
@@ -2823,6 +2506,15 @@ static void free_vmap_area_noflush(struct vmap_area *va)
vn = is_vn_id_valid(vn_id) ?
id_to_node(vn_id):addr_to_node(va->va_start);
+ /*
+ * Drop occupied-range visibility as soon as the area is freed, even
+ * though coalescing/reinsertion into the free index remains deferred.
+ */
+ spin_lock(&free_vmap_area_lock);
+ try_init_occupied_mt_locked();
+ WARN_ON_ONCE(!occupied_mt_erase_va_locked(va));
+ spin_unlock(&free_vmap_area_lock);
+
spin_lock(&vn->lazy.lock);
insert_vmap_area_lazy_locked(va, vn);
spin_unlock(&vn->lazy.lock);
@@ -5240,14 +4932,11 @@ EXPORT_SYMBOL_GPL(free_vm_area);
#ifdef CONFIG_SMP
static __always_inline struct vmap_area *
-free_vmap_area_prev(struct vmap_area *va)
+free_vmap_area_prev_by_addr(unsigned long addr)
{
lockdep_assert_held(&free_vmap_area_lock);
- if (va->list.prev == &free_vmap_area_list)
- return NULL;
-
- return list_entry(va->list.prev, struct vmap_area, list);
+ return __find_vmap_area_enclose_addr_mt(addr, &free_vmap_area_mt);
}
/**
@@ -5262,19 +4951,9 @@ free_vmap_area_prev(struct vmap_area *va)
static struct vmap_area *
pvm_find_va_enclose_addr(unsigned long addr)
{
- struct vmap_area *va;
-
lockdep_assert_held(&free_vmap_area_lock);
- if (free_mt_supported())
- return __find_vmap_area_enclose_addr_mt(addr, &free_vmap_area_mt);
-
- list_for_each_entry_reverse(va, &free_vmap_area_list, list) {
- if (va->va_start <= addr)
- return va;
- }
-
- return NULL;
+ return __find_vmap_area_enclose_addr_mt(addr, &free_vmap_area_mt);
}
/**
@@ -5293,13 +4972,19 @@ pvm_determine_end_from_reverse(struct vmap_area **va, unsigned long align)
unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
unsigned long addr;
+ lockdep_assert_held(&free_vmap_area_lock);
+
if (likely(*va)) {
- list_for_each_entry_from_reverse((*va),
- &free_vmap_area_list, list) {
+ do {
addr = min((*va)->va_end & ~(align - 1), vmalloc_end);
if ((*va)->va_start < addr)
return addr;
- }
+
+ if ((*va)->va_start == 0)
+ break;
+
+ *va = free_vmap_area_prev_by_addr((*va)->va_start - 1);
+ } while (*va);
}
return 0;
@@ -5382,6 +5067,8 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
}
retry:
spin_lock(&free_vmap_area_lock);
+ try_init_free_mt_locked();
+ try_init_occupied_mt_locked();
/* start scanning - we scan from the top, begin with the last area */
area = term_area = last_area;
@@ -5419,7 +5106,10 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
* If this VA does not fit, move base downwards and recheck.
*/
if (base + start < va->va_start) {
- va = free_vmap_area_prev(va);
+ if (va->va_start == 0)
+ va = NULL;
+ else
+ va = free_vmap_area_prev_by_addr(va->va_start - 1);
base = pvm_determine_end_from_reverse(&va, align) - end;
term_area = area;
continue;
@@ -5459,6 +5149,12 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
va = vas[area];
va->va_start = start;
va->va_end = start + size;
+
+ if (occupied_mt_supported() &&
+ !occupied_mt_store_range_locked(va->va_start, va->va_end)) {
+ area++;
+ goto recovery;
+ }
}
spin_unlock(&free_vmap_area_lock);
@@ -5501,11 +5197,14 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
while (area--) {
orig_start = vas[area]->va_start;
orig_end = vas[area]->va_end;
+ WARN_ON_ONCE(!occupied_mt_erase_va_locked(vas[area]));
va = merge_or_add_vmap_area_free_locked(vas[area]);
+ WARN_ON_ONCE(!va);
if (va)
kasan_release_vmalloc(orig_start, orig_end,
- va->va_start, va->va_end,
- KASAN_VMALLOC_PAGE_RANGE | KASAN_VMALLOC_TLB_FLUSH);
+ va->va_start, va->va_end,
+ KASAN_VMALLOC_PAGE_RANGE |
+ KASAN_VMALLOC_TLB_FLUSH);
vas[area] = NULL;
}
@@ -5551,7 +5250,9 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
for (area = 0; area < nr_vms; area++) {
orig_start = vas[area]->va_start;
orig_end = vas[area]->va_end;
+ WARN_ON_ONCE(!occupied_mt_erase_va_locked(vas[area]));
va = merge_or_add_vmap_area_free_locked(vas[area]);
+ WARN_ON_ONCE(!va);
if (va)
kasan_release_vmalloc(orig_start, orig_end,
va->va_start, va->va_end,
@@ -5598,7 +5299,7 @@ bool vmalloc_dump_obj(void *object)
if (!spin_trylock(&vn->busy.lock))
return false;
- va = __find_vmap_area(addr, &vn->busy.root);
+ va = find_vmap_area_busy_locked(addr, vn);
if (!va || !va->vm) {
spin_unlock(&vn->busy.lock);
return false;
@@ -5650,11 +5351,17 @@ static void show_purge_info(struct seq_file *m)
for_each_vmap_node(vn) {
spin_lock(&vn->lazy.lock);
- list_for_each_entry(va, &vn->lazy.head, list) {
- seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n",
- (void *)va->va_start, (void *)va->va_end,
- va_size(va));
+ try_init_lazy_mt_locked(vn);
+ if (WARN_ON_ONCE(!vn->lazy.mt_enabled)) {
+ spin_unlock(&vn->lazy.lock);
+ continue;
}
+ MA_STATE(mas, &vn->lazy.mt, 0, 0);
+
+ mas_for_each(&mas, va, ULONG_MAX)
+ seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n",
+ (void *)va->va_start, (void *)va->va_end,
+ va_size(va));
spin_unlock(&vn->lazy.lock);
}
}
@@ -5671,12 +5378,19 @@ static int vmalloc_info_show(struct seq_file *m, void *p)
for_each_vmap_node(vn) {
spin_lock(&vn->busy.lock);
- list_for_each_entry(va, &vn->busy.head, list) {
+ try_init_busy_mt_locked(vn);
+ if (WARN_ON_ONCE(!vn->busy.mt_enabled)) {
+ spin_unlock(&vn->busy.lock);
+ continue;
+ }
+ MA_STATE(mas, &vn->busy.mt, 0, 0);
+
+ mas_for_each(&mas, va, ULONG_MAX) {
if (!va->vm) {
if (va->flags & VMAP_RAM)
seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
- (void *)va->va_start, (void *)va->va_end,
- va_size(va));
+ (void *)va->va_start, (void *)va->va_end,
+ va_size(va));
continue;
}
@@ -5689,7 +5403,7 @@ static int vmalloc_info_show(struct seq_file *m, void *p)
smp_rmb();
seq_printf(m, "0x%pK-0x%pK %7ld",
- v->addr, v->addr + v->size, v->size);
+ v->addr, v->addr + v->size, v->size);
if (v->caller)
seq_printf(m, " %pS", v->caller);
@@ -5754,6 +5468,8 @@ static void __init vmap_init_free_space(void)
struct vmap_area *free;
struct vm_struct *busy;
+ spin_lock(&free_vmap_area_lock);
+
/*
* B F B B B F
* -|-----|.....|-----|-----|-----|.....|-
@@ -5761,19 +5477,18 @@ static void __init vmap_init_free_space(void)
* |<--------------------------------->|
*/
for (busy = vmlist; busy; busy = busy->next) {
- if ((unsigned long) busy->addr - vmap_start > 0) {
+ if ((unsigned long)busy->addr - vmap_start > 0) {
free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
if (!WARN_ON_ONCE(!free)) {
free->va_start = vmap_start;
- free->va_end = (unsigned long) busy->addr;
+ free->va_end = (unsigned long)busy->addr;
- insert_vmap_area_augment(free, NULL,
- &free_vmap_area_root,
- &free_vmap_area_list);
+ if (WARN_ON_ONCE(!insert_vmap_area_free_locked(free)))
+ kmem_cache_free(vmap_area_cachep, free);
}
}
- vmap_start = (unsigned long) busy->addr + busy->size;
+ vmap_start = (unsigned long)busy->addr + busy->size;
}
if (vmap_end - vmap_start > 0) {
@@ -5782,11 +5497,12 @@ static void __init vmap_init_free_space(void)
free->va_start = vmap_start;
free->va_end = vmap_end;
- insert_vmap_area_augment(free, NULL,
- &free_vmap_area_root,
- &free_vmap_area_list);
+ if (WARN_ON_ONCE(!insert_vmap_area_free_locked(free)))
+ kmem_cache_free(vmap_area_cachep, free);
}
}
+
+ spin_unlock(&free_vmap_area_lock);
}
static void vmap_init_nodes(void)
@@ -5825,13 +5541,15 @@ static void vmap_init_nodes(void)
#endif
for_each_vmap_node(vn) {
- vn->busy.root = RB_ROOT;
- INIT_LIST_HEAD(&vn->busy.head);
spin_lock_init(&vn->busy.lock);
+ mt_init_flags(&vn->busy.mt, MT_FLAGS_LOCK_EXTERN);
+ mt_set_external_lock(&vn->busy.mt, &vn->busy.lock);
+ vn->busy.mt_enabled = true;
- vn->lazy.root = RB_ROOT;
- INIT_LIST_HEAD(&vn->lazy.head);
spin_lock_init(&vn->lazy.lock);
+ mt_init_flags(&vn->lazy.mt, MT_FLAGS_LOCK_EXTERN);
+ mt_set_external_lock(&vn->lazy.mt, &vn->lazy.lock);
+ vn->lazy.mt_enabled = true;
for (i = 0; i < MAX_VA_SIZE_PAGES; i++) {
INIT_LIST_HEAD(&vn->pool[i].head);
@@ -5881,6 +5599,11 @@ void __init vmalloc_init(void)
* Create the cache for vmap_area objects.
*/
vmap_area_cachep = KMEM_CACHE(vmap_area, SLAB_PANIC);
+ /*
+ * vmalloc_init() performs Maple stores/preallocation while importing
+ * early ranges. Ensure Maple node cache is available at this stage.
+ */
+ maple_tree_init();
for_each_possible_cpu(i) {
struct vmap_block_queue *vbq;
@@ -5911,7 +5634,15 @@ void __init vmalloc_init(void)
va->vm = tmp;
vn = addr_to_node(va->va_start);
- insert_vmap_area(va, &vn->busy.root, &vn->busy.head);
+ spin_lock(&vn->busy.lock);
+ insert_vmap_area_busy_locked(va, vn);
+ spin_unlock(&vn->busy.lock);
+
+ spin_lock(&free_vmap_area_lock);
+ try_init_occupied_mt_locked();
+ WARN_ON_ONCE(!occupied_mt_store_range_locked(va->va_start,
+ va->va_end));
+ spin_unlock(&free_vmap_area_lock);
}
/*
--
2.34.1
next prev parent reply other threads:[~2026-06-13 17:21 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-13 17:19 [PATCH RFC 00/12] mm/vmalloc: migrate vmap_area indexing from rb-tree to maple-tree Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 01/12] mm/vmalloc: introduce maple_tree-based indexing for vmap_area Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 02/12] mm/vmalloc: convert allocation-side gap finding and insertion to maple_tree Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 03/12] mm/vmalloc: convert free, purge, and pcpu paths " Pranjal Arya
2026-06-13 17:19 ` Pranjal Arya [this message]
2026-06-13 17:19 ` [PATCH RFC 05/12] mm/vmalloc: tighten failure handling under memory pressure Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 06/12] mm/vmalloc: tighten alloc/free hot paths Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 07/12] mm/vmalloc: consolidate occupied tree as authoritative index on hot path Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 08/12] mm/vmalloc: track lazy-purge queue as a list_head Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 09/12] mm/vmalloc: collapse busy-tree find-then-unlink into a single mas_erase Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 10/12] mm/vmalloc: per-CPU caching of free ranges from the maple_tree allocator Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 11/12] mm/vmalloc: O(1) lookup of cached vmap_areas with bounded fast-reject Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 12/12] mm/vmalloc: harden bump-allocator alloc/free against UBSAN array bounds Pranjal Arya
2026-06-13 23:15 ` [PATCH RFC 00/12] mm/vmalloc: migrate vmap_area indexing from rb-tree to maple-tree Matthew Wilcox
2026-06-14 6:35 ` [syzbot ci] " syzbot ci
2026-06-14 6:58 ` [PATCH RFC 00/12] " Uladzislau Rezki
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260613-vmalloc_maple-v1-4-0aa740bb944b@oss.qualcomm.com \
--to=pranjal.arya@oss.qualcomm.com \
--cc=Suzuki.Poulose@arm.com \
--cc=akpm@linux-foundation.org \
--cc=aliceryhl@google.com \
--cc=andrewjballance@gmail.com \
--cc=balbirs@nvidia.com \
--cc=dev.jain@arm.com \
--cc=dvyukov@google.com \
--cc=elver@google.com \
--cc=glider@google.com \
--cc=jackmanb@google.com \
--cc=liam@infradead.org \
--cc=linux-arm-msm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=ljs@kernel.org \
--cc=maple-tree@lists.infradead.org \
--cc=neil.armstrong@linaro.org \
--cc=praan@google.com \
--cc=puranjay@kernel.org \
--cc=santosh.shukla@amd.com \
--cc=shuah@kernel.org \
--cc=smostafa@google.com \
--cc=sudeep.holla@kernel.org \
--cc=surenb@google.com \
--cc=urezki@gmail.com \
--cc=will@kernel.org \
--cc=wkarny@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox