Linux-mm Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Pranjal Arya <pranjal.arya@oss.qualcomm.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	Uladzislau Rezki <urezki@gmail.com>,
	"Liam R. Howlett" <liam@infradead.org>,
	Alice Ryhl <aliceryhl@google.com>,
	Andrew Ballance <andrewjballance@gmail.com>
Cc: linux-arm-msm@vger.kernel.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org, maple-tree@lists.infradead.org,
	Lorenzo Stoakes <ljs@kernel.org>,
	Pranjal Shrivastava <praan@google.com>,
	Will Deacon <will@kernel.org>,
	Suzuki K Poulose <Suzuki.Poulose@arm.com>,
	Neil Armstrong <neil.armstrong@linaro.org>,
	Mostafa Saleh <smostafa@google.com>,
	Balbir Singh <balbirs@nvidia.com>,
	Suren Baghdasaryan <surenb@google.com>,
	Marco Elver <elver@google.com>,
	Dmitry Vyukov <dvyukov@google.com>,
	Alexander Potapenko <glider@google.com>,
	Shuah Khan <shuah@kernel.org>, Dev Jain <dev.jain@arm.com>,
	Brendan Jackman <jackmanb@google.com>,
	Puranjay Mohan <puranjay@kernel.org>,
	Santosh Shukla <santosh.shukla@amd.com>,
	Wyes Karny <wkarny@gmail.com>,
	Pranjal Arya <pranjal.arya@oss.qualcomm.com>,
	Sudeep Holla <sudeep.holla@kernel.org>
Subject: [PATCH RFC 02/12] mm/vmalloc: convert allocation-side gap finding and insertion to maple_tree
Date: Sat, 13 Jun 2026 22:49:44 +0530	[thread overview]
Message-ID: <20260613-vmalloc_maple-v1-2-0aa740bb944b@oss.qualcomm.com> (raw)
In-Reply-To: <20260613-vmalloc_maple-v1-0-0aa740bb944b@oss.qualcomm.com>

Switch __alloc_vmap_area, va_clip, classify_va_fit_type, and the
insertion / merge helpers to drive the global free-area state through
the maple_tree-backed helpers. The augmented rb_tree walk
(find_vmap_lowest_match / find_va_links / augment_tree_propagate_*)
becomes unreachable on the alloc path and is removed.

The alloc path retains a list-based next-fit walk over
free_vmap_area_list driven by free_vmap_alloc_hint, but its
insertion-point lookup, neighbour validation and free-area indexing
run through the maple_tree helpers
(find_vmap_area_insert_point_mt, free_mt_store_va_locked,
free_mt_update_va_locked).

va_clip handles the LE/RE/NE fit types via the new free_mt_*_locked
helpers.

The pcpu and free paths still drive the rb_tree.

Signed-off-by: Pranjal Arya <pranjal.arya@oss.qualcomm.com>
---
 mm/vmalloc.c | 653 +++++++++++++++++++++++++++++------------------------------
 1 file changed, 323 insertions(+), 330 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 67f753d04c96..c5f509f033e6 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1535,261 +1535,155 @@ find_vmap_area_exceed_addr_lock(unsigned long addr, struct vmap_area **va)
 	return NULL;
 }
 
-/*
- * This function returns back addresses of parent node
- * and its left or right link for further processing.
- *
- * Otherwise NULL is returned. In that case all further
- * steps regarding inserting of conflicting overlap range
- * have to be declined and actually considered as a bug.
- */
-static __always_inline struct rb_node **
-find_va_links(struct vmap_area *va,
-	struct rb_root *root, struct rb_node *from,
-	struct rb_node **parent)
-{
-	struct vmap_area *tmp_va;
-	struct rb_node **link;
-
-	if (root) {
-		link = &root->rb_node;
-		if (unlikely(!*link)) {
-			*parent = NULL;
-			return link;
-		}
-	} else {
-		link = &from;
-	}
+static __always_inline void
+insert_vmap_area_busy_locked(struct vmap_area *va, struct vmap_node *vn)
+{
+	int err;
 
-	/*
-	 * Go to the bottom of the tree. When we hit the last point
-	 * we end up with parent rb_node and correct direction, i name
-	 * it link, where the new va->rb_node will be attached to.
-	 */
-	do {
-		tmp_va = rb_entry(*link, struct vmap_area, rb_node);
+	lockdep_assert_held(&vn->busy.lock);
 
-		/*
-		 * During the traversal we also do some sanity check.
-		 * Trigger the BUG() if there are sides(left/right)
-		 * or full overlaps.
-		 */
-		if (va->va_end <= tmp_va->va_start)
-			link = &(*link)->rb_left;
-		else if (va->va_start >= tmp_va->va_end)
-			link = &(*link)->rb_right;
-		else {
-			WARN(1, "vmalloc bug: 0x%lx-0x%lx overlaps with 0x%lx-0x%lx\n",
-				va->va_start, va->va_end, tmp_va->va_start, tmp_va->va_end);
+	try_init_busy_mt_locked(vn);
 
-			return NULL;
-		}
-	} while (*link);
+	if (likely(vn->busy.mt_enabled)) {
+		MA_STATE(mas, &vn->busy.mt, va->va_start, va->va_end - 1);
 
-	*parent = &tmp_va->rb_node;
-	return link;
-}
+		if (!insert_vmap_area_list_sorted_mt(va, &vn->busy.mt,
+						     &vn->busy.head))
+			return;
 
-static __always_inline struct list_head *
-get_va_next_sibling(struct rb_node *parent, struct rb_node **link)
-{
-	struct list_head *list;
+		err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
+		if (WARN_ON_ONCE(err))
+			disable_busy_mt_locked(vn);
 
-	if (unlikely(!parent))
-		/*
-		 * The red-black tree where we try to find VA neighbors
-		 * before merging or inserting is empty, i.e. it means
-		 * there is no free vmap space. Normally it does not
-		 * happen but we handle this case anyway.
-		 */
-		return NULL;
+		return;
+	}
 
-	list = &rb_entry(parent, struct vmap_area, rb_node)->list;
-	return (&parent->rb_right == link ? list->next : list);
+	if (!insert_vmap_area_list_sorted(va, &vn->busy.head))
+		return;
 }
 
 static __always_inline void
-__link_va(struct vmap_area *va, struct rb_root *root,
-	struct rb_node *parent, struct rb_node **link,
-	struct list_head *head, bool augment)
+unlink_vmap_area_busy_locked(struct vmap_area *va, struct vmap_node *vn)
 {
-	/*
-	 * VA is still not in the list, but we can
-	 * identify its future previous list_head node.
-	 */
-	if (likely(parent)) {
-		head = &rb_entry(parent, struct vmap_area, rb_node)->list;
-		if (&parent->rb_right != link)
-			head = head->prev;
-	}
+	int err;
 
-	/* Insert to the rb-tree */
-	rb_link_node(&va->rb_node, parent, link);
-	if (augment) {
-		/*
-		 * Some explanation here. Just perform simple insertion
-		 * to the tree. We do not set va->subtree_max_size to
-		 * its current size before calling rb_insert_augmented().
-		 * It is because we populate the tree from the bottom
-		 * to parent levels when the node _is_ in the tree.
-		 *
-		 * Therefore we set subtree_max_size to zero after insertion,
-		 * to let __augment_tree_propagate_from() puts everything to
-		 * the correct order later on.
-		 */
-		rb_insert_augmented(&va->rb_node,
-			root, &free_vmap_area_rb_augment_cb);
-		va->subtree_max_size = 0;
-	} else {
-		rb_insert_color(&va->rb_node, root);
-	}
+	lockdep_assert_held(&vn->busy.lock);
 
-	/* Address-sort this list */
-	list_add(&va->list, head);
-}
+	MA_STATE(mas, &vn->busy.mt, va->va_start, va->va_end - 1);
 
-static __always_inline void
-link_va(struct vmap_area *va, struct rb_root *root,
-	struct rb_node *parent, struct rb_node **link,
-	struct list_head *head)
-{
-	__link_va(va, root, parent, link, head, false);
-}
+	list_del_init(&va->list);
 
-static __always_inline void
-link_va_augment(struct vmap_area *va, struct rb_root *root,
-	struct rb_node *parent, struct rb_node **link,
-	struct list_head *head)
-{
-	__link_va(va, root, parent, link, head, true);
-}
+	try_init_busy_mt_locked(vn);
 
-static __always_inline void
-__unlink_va(struct vmap_area *va, struct rb_root *root, bool augment)
-{
-	if (WARN_ON(RB_EMPTY_NODE(&va->rb_node)))
+	if (unlikely(!vn->busy.mt_enabled))
 		return;
 
-	if (augment)
-		rb_erase_augmented(&va->rb_node,
-			root, &free_vmap_area_rb_augment_cb);
-	else
-		rb_erase(&va->rb_node, root);
-
-	list_del_init(&va->list);
-	RB_CLEAR_NODE(&va->rb_node);
+	err = mas_store_gfp(&mas, NULL, GFP_ATOMIC | __GFP_NOWARN);
+	if (WARN_ON_ONCE(err))
+		disable_busy_mt_locked(vn);
 }
 
 static __always_inline void
-unlink_va(struct vmap_area *va, struct rb_root *root)
+insert_vmap_area_lazy_locked(struct vmap_area *va, struct vmap_node *vn)
 {
-	__unlink_va(va, root, false);
-}
+	int err;
 
-static __always_inline void
-unlink_va_augment(struct vmap_area *va, struct rb_root *root)
-{
-	__unlink_va(va, root, true);
+	lockdep_assert_held(&vn->lazy.lock);
+
+	try_init_lazy_mt_locked(vn);
+
+	if (likely(vn->lazy.mt_enabled)) {
+		MA_STATE(mas, &vn->lazy.mt, va->va_start, va->va_end - 1);
+
+		if (!insert_vmap_area_list_sorted_mt(va, &vn->lazy.mt,
+						     &vn->lazy.head))
+			return;
+
+		err = mas_store_gfp(&mas, va, GFP_ATOMIC | __GFP_NOWARN);
+		if (WARN_ON_ONCE(err))
+			disable_lazy_mt_locked(vn);
+
+		return;
+	}
+
+	if (!insert_vmap_area_list_sorted(va, &vn->lazy.head))
+		return;
 }
 
-#if DEBUG_AUGMENT_PROPAGATE_CHECK
-/*
- * Gets called when remove the node and rotate.
- */
-static __always_inline unsigned long
-compute_subtree_max_size(struct vmap_area *va)
+static __always_inline bool
+lazy_vmap_areas_empty_locked(struct vmap_node *vn)
 {
-	return max3(va_size(va),
-		get_subtree_max_size(va->rb_node.rb_left),
-		get_subtree_max_size(va->rb_node.rb_right));
+	lockdep_assert_held(&vn->lazy.lock);
+
+	try_init_lazy_mt_locked(vn);
+
+	if (likely(vn->lazy.mt_enabled))
+		return mtree_empty(&vn->lazy.mt);
+
+	return list_empty(&vn->lazy.head);
 }
 
-static void
-augment_tree_propagate_check(void)
+static __always_inline void
+move_lazy_vmap_areas_to_purge_locked(struct vmap_node *vn)
 {
 	struct vmap_area *va;
-	unsigned long computed_size;
+	int err;
 
-	list_for_each_entry(va, &free_vmap_area_list, list) {
-		computed_size = compute_subtree_max_size(va);
-		if (computed_size != va->subtree_max_size)
-			pr_emerg("tree is corrupted: %lu, %lu\n",
-				va_size(va), va->subtree_max_size);
+	lockdep_assert_held(&vn->lazy.lock);
+
+	try_init_lazy_mt_locked(vn);
+
+	if (likely(vn->lazy.mt_enabled)) {
+		list_for_each_entry(va, &vn->lazy.head, list) {
+			MA_STATE(mas, &vn->lazy.mt, va->va_start, va->va_end - 1);
+
+			err = mas_store_gfp(&mas, NULL, GFP_ATOMIC | __GFP_NOWARN);
+			if (WARN_ON_ONCE(err)) {
+				disable_lazy_mt_locked(vn);
+				break;
+			}
+		}
+
+		if (vn->lazy.mt_enabled && WARN_ON_ONCE(!mtree_empty(&vn->lazy.mt)))
+			disable_lazy_mt_locked(vn);
 	}
+
+	list_replace_init(&vn->lazy.head, &vn->purge_list);
 }
-#endif
 
-/*
- * This function populates subtree_max_size from bottom to upper
- * levels starting from VA point. The propagation must be done
- * when VA size is modified by changing its va_start/va_end. Or
- * in case of newly inserting of VA to the tree.
- *
- * It means that __augment_tree_propagate_from() must be called:
- * - After VA has been inserted to the tree(free path);
- * - After VA has been shrunk(allocation path);
- * - After VA has been increased(merging path).
- *
- * Please note that, it does not mean that upper parent nodes
- * and their subtree_max_size are recalculated all the time up
- * to the root node.
- *
- *       4--8
- *        /\
- *       /  \
- *      /    \
- *    2--2  8--8
- *
- * For example if we modify the node 4, shrinking it to 2, then
- * no any modification is required. If we shrink the node 2 to 1
- * its subtree_max_size is updated only, and set to 1. If we shrink
- * the node 8 to 6, then its subtree_max_size is set to 6 and parent
- * node becomes 4--6.
- */
-static __always_inline void
-augment_tree_propagate_from(struct vmap_area *va)
+static __always_inline bool
+insert_vmap_area_free_locked(struct vmap_area *va)
 {
-	/*
-	 * Populate the tree from bottom towards the root until
-	 * the calculated maximum available size of checked node
-	 * is equal to its current one.
-	 */
-	free_vmap_area_rb_augment_cb_propagate(&va->rb_node, NULL);
+	lockdep_assert_held(&free_vmap_area_lock);
 
-#if DEBUG_AUGMENT_PROPAGATE_CHECK
-	augment_tree_propagate_check();
-#endif
-}
+	try_init_free_mt_locked();
 
-static void
-insert_vmap_area(struct vmap_area *va,
-	struct rb_root *root, struct list_head *head)
-{
-	struct rb_node **link;
-	struct rb_node *parent;
+	if (likely(free_mt_supported())) {
+		if (!insert_vmap_area_list_sorted_mt(va, &free_vmap_area_mt,
+						     &free_vmap_area_list))
+			return false;
 
-	link = find_va_links(va, root, NULL, &parent);
-	if (link)
-		link_va(va, root, parent, link, head);
+		free_mt_store_va_locked(va);
+	} else {
+		if (!insert_vmap_area_list_sorted(va, &free_vmap_area_list))
+			return false;
+	}
+
+	return true;
 }
 
-static void
-insert_vmap_area_augment(struct vmap_area *va,
-	struct rb_node *from, struct rb_root *root,
-	struct list_head *head)
+static __always_inline void
+unlink_vmap_area_free_locked(struct vmap_area *va)
 {
-	struct rb_node **link;
-	struct rb_node *parent;
+	lockdep_assert_held(&free_vmap_area_lock);
 
-	if (from)
-		link = find_va_links(va, NULL, from, &parent);
-	else
-		link = find_va_links(va, root, NULL, &parent);
+	if (WARN_ON_ONCE(list_empty(&va->list)))
+		return;
 
-	if (link) {
-		link_va_augment(va, root, parent, link, head);
-		augment_tree_propagate_from(va);
-	}
+	if (likely(free_mt_supported()))
+		free_mt_erase_va_locked(va);
+
+	list_del_init(&va->list);
 }
 
 /*
@@ -1804,29 +1698,20 @@ insert_vmap_area_augment(struct vmap_area *va,
  * ongoing.
  */
 static __always_inline struct vmap_area *
-__merge_or_add_vmap_area(struct vmap_area *va,
-	struct rb_root *root, struct list_head *head, bool augment)
+__merge_or_add_vmap_area(struct vmap_area *va, struct list_head *head, bool update_mt)
 {
 	struct vmap_area *sibling;
 	struct list_head *next;
-	struct rb_node **link;
-	struct rb_node *parent;
+	unsigned long old_start, old_end;
 	bool merged = false;
 
-	/*
-	 * Find a place in the tree where VA potentially will be
-	 * inserted, unless it is merged with its sibling/siblings.
-	 */
-	link = find_va_links(va, root, NULL, &parent);
-	if (!link)
-		return NULL;
+	if (update_mt && free_mt_supported())
+		next = find_vmap_area_insert_point_mt(va, &free_vmap_area_mt, head);
+	else
+		next = find_vmap_area_insert_point_list(va, head);
 
-	/*
-	 * Get next node of VA to check if merging can be done.
-	 */
-	next = get_va_next_sibling(parent, link);
-	if (unlikely(next == NULL))
-		goto insert;
+	if (!next)
+		return NULL;
 
 	/*
 	 * start            end
@@ -1838,7 +1723,11 @@ __merge_or_add_vmap_area(struct vmap_area *va,
 	if (next != head) {
 		sibling = list_entry(next, struct vmap_area, list);
 		if (sibling->va_start == va->va_end) {
+			old_start = sibling->va_start;
+			old_end = sibling->va_end;
 			sibling->va_start = va->va_start;
+			if (update_mt && free_mt_supported())
+				free_mt_update_va_locked(sibling, old_start, old_end);
 
 			/* Free vmap_area object. */
 			kmem_cache_free(vmap_area_cachep, va);
@@ -1862,14 +1751,20 @@ __merge_or_add_vmap_area(struct vmap_area *va,
 			/*
 			 * If both neighbors are coalesced, it is important
 			 * to unlink the "next" node first, followed by merging
-			 * with "previous" one. Otherwise the tree might not be
-			 * fully populated if a sibling's augmented value is
-			 * "normalized" because of rotation operations.
+			 * with "previous" one.
 			 */
-			if (merged)
-				__unlink_va(va, root, augment);
+			if (merged) {
+				if (update_mt)
+					unlink_vmap_area_free_locked(va);
+				else
+					list_del_init(&va->list);
+			}
 
+			old_start = sibling->va_start;
+			old_end = sibling->va_end;
 			sibling->va_end = va->va_end;
+			if (update_mt && free_mt_supported())
+				free_mt_update_va_locked(sibling, old_start, old_end);
 
 			/* Free vmap_area object. */
 			kmem_cache_free(vmap_area_cachep, va);
@@ -1880,31 +1775,97 @@ __merge_or_add_vmap_area(struct vmap_area *va,
 		}
 	}
 
-insert:
-	if (!merged)
-		__link_va(va, root, parent, link, head, augment);
+	if (!merged) {
+		if (update_mt)
+			insert_vmap_area_free_locked(va);
+		else
+			list_add_tail(&va->list, next);
+	}
 
 	return va;
 }
 
 static __always_inline struct vmap_area *
 merge_or_add_vmap_area(struct vmap_area *va,
-	struct rb_root *root, struct list_head *head)
+	struct list_head *head)
 {
-	return __merge_or_add_vmap_area(va, root, head, false);
+	return __merge_or_add_vmap_area(va, head, false);
 }
 
 static __always_inline struct vmap_area *
-merge_or_add_vmap_area_augment(struct vmap_area *va,
-	struct rb_root *root, struct list_head *head)
+merge_or_add_vmap_area_free_locked(struct vmap_area *va)
 {
-	va = __merge_or_add_vmap_area(va, root, head, true);
-	if (va)
-		augment_tree_propagate_from(va);
+	lockdep_assert_held(&free_vmap_area_lock);
+
+	va = __merge_or_add_vmap_area(va, &free_vmap_area_list, true);
+	if (va && va->va_start < free_vmap_alloc_hint)
+		free_vmap_alloc_hint = va->va_start;
 
 	return va;
 }
 
+/*
+ * Transitional wrappers retained until all legacy rb call sites are switched.
+ * Follow-up patches in this series remove these wrappers.
+ */
+static __always_inline void
+insert_vmap_area(struct vmap_area *va, struct rb_root *root,
+		 struct list_head *head)
+{
+	struct vmap_node *vn = addr_to_node(va->va_start);
+
+	if (head == &free_vmap_area_list) {
+		insert_vmap_area_free_locked(va);
+		return;
+	}
+
+	if (head == &vn->lazy.head) {
+		insert_vmap_area_lazy_locked(va, vn);
+		return;
+	}
+
+	insert_vmap_area_busy_locked(va, vn);
+}
+
+static __always_inline void
+insert_vmap_area_augment(struct vmap_area *va, struct rb_node *from,
+			 struct rb_root *root, struct list_head *head)
+{
+	insert_vmap_area(va, root, head);
+}
+
+static __always_inline void unlink_va(struct vmap_area *va, struct rb_root *root)
+{
+	struct vmap_node *vn = addr_to_node(va->va_start);
+
+	if (root == &free_vmap_area_root) {
+		unlink_vmap_area_free_locked(va);
+		return;
+	}
+
+	unlink_vmap_area_busy_locked(va, vn);
+}
+
+static __always_inline void
+unlink_va_augment(struct vmap_area *va, struct rb_root *root)
+{
+	unlink_va(va, root);
+}
+
+static __always_inline void augment_tree_propagate_from(struct vmap_area *va)
+{
+}
+
+static __always_inline struct vmap_area *
+merge_or_add_vmap_area_augment(struct vmap_area *va, struct rb_root *root,
+			       struct list_head *head)
+{
+	if (head == &free_vmap_area_list)
+		return merge_or_add_vmap_area_free_locked(va);
+
+	return merge_or_add_vmap_area(va, head);
+}
+
 static __always_inline bool
 is_within_this_va(struct vmap_area *va, unsigned long size,
 	unsigned long align, unsigned long vstart)
@@ -1924,74 +1885,103 @@ is_within_this_va(struct vmap_area *va, unsigned long size,
 	return (nva_start_addr + size <= va->va_end);
 }
 
-/*
- * Find the first free block(lowest start address) in the tree,
- * that will accomplish the request corresponding to passing
- * parameters. Please note, with an alignment bigger than PAGE_SIZE,
- * a search length is adjusted to account for worst case alignment
- * overhead.
- */
 static __always_inline struct vmap_area *
-find_vmap_lowest_match(struct rb_root *root, unsigned long size,
-	unsigned long align, unsigned long vstart, bool adjust_search_size)
+find_vmap_lowest_match_list(struct list_head *head, unsigned long size,
+			    unsigned long align, unsigned long vstart)
 {
 	struct vmap_area *va;
-	struct rb_node *node;
-	unsigned long length;
 
-	/* Start from the root. */
-	node = root->rb_node;
+	list_for_each_entry(va, head, list) {
+		if (!is_within_this_va(va, size, align, vstart))
+			continue;
 
-	/* Adjust the search size for alignment overhead. */
-	length = adjust_search_size ? size + align - 1 : size;
+		return va;
+	}
 
-	while (node) {
-		va = rb_entry(node, struct vmap_area, rb_node);
+	return NULL;
+}
 
-		if (get_subtree_max_size(node->rb_left) >= length &&
-				vstart < va->va_start) {
-			node = node->rb_left;
-		} else {
-			if (is_within_this_va(va, size, align, vstart))
-				return va;
+static __always_inline unsigned long
+clamp_vmap_alloc_hint(unsigned long hint, unsigned long vstart,
+		      unsigned long vend)
+{
+	if (hint < vstart || hint >= vend)
+		return vstart;
 
-			/*
-			 * Does not make sense to go deeper towards the right
-			 * sub-tree if it does not have a free block that is
-			 * equal or bigger to the requested search length.
-			 */
-			if (get_subtree_max_size(node->rb_right) >= length) {
-				node = node->rb_right;
-				continue;
-			}
+	return hint;
+}
 
-			/*
-			 * OK. We roll back and find the first right sub-tree,
-			 * that will satisfy the search criteria. It can happen
-			 * due to "vstart" restriction or an alignment overhead
-			 * that is bigger then PAGE_SIZE.
-			 */
-			while ((node = rb_parent(node))) {
-				va = rb_entry(node, struct vmap_area, rb_node);
-				if (is_within_this_va(va, size, align, vstart))
+/*
+ * Next-fit scan with wrap-around. Use maple to jump to the first candidate
+ * around the hint in O(log n), then continue on the ordered list for cheap
+ * neighbour traversal and deterministic coalescing behaviour.
+ */
+static __always_inline struct vmap_area *
+find_vmap_match_list_next_fit(struct list_head *head, unsigned long size,
+			      unsigned long align, unsigned long vstart,
+			      unsigned long vend)
+{
+	struct vmap_area *va, *start = NULL;
+	unsigned long hint;
+	bool wrapped;
+
+	hint = clamp_vmap_alloc_hint(free_vmap_alloc_hint, vstart, vend);
+
+	if (hint != vstart) {
+		if (free_mt_supported())
+			start = __find_vmap_area_exceed_addr_mt(hint,
+								&free_vmap_area_mt);
+
+		if (start) {
+			va = start;
+			list_for_each_entry_from(va, head, list) {
+				if (is_within_this_va(va, size, align, hint))
 					return va;
+			}
+		} else {
+			list_for_each_entry(va, head, list) {
+				if (va->va_end <= hint)
+					continue;
 
-				if (get_subtree_max_size(node->rb_right) >= length &&
-						vstart <= va->va_start) {
-					/*
-					 * Shift the vstart forward. Please note, we update it with
-					 * parent's start address adding "1" because we do not want
-					 * to enter same sub-tree after it has already been checked
-					 * and no suitable free block found there.
-					 */
-					vstart = va->va_start + 1;
-					node = node->rb_right;
-					break;
-				}
+				if (is_within_this_va(va, size, align, hint))
+					return va;
 			}
 		}
 	}
 
+	wrapped = (hint != vstart);
+	list_for_each_entry(va, head, list) {
+		if (wrapped) {
+			if (start && va == start)
+				break;
+			if (!start && va->va_start >= hint)
+				break;
+		}
+
+		if (is_within_this_va(va, size, align, vstart))
+			return va;
+	}
+
+	return NULL;
+}
+
+static __always_inline struct vmap_area *
+find_vmap_lowest_match_mt(struct maple_tree *tree, unsigned long size,
+			  unsigned long align, unsigned long vstart)
+{
+	MA_STATE(mas, tree, vstart, vstart);
+	struct vmap_area *va;
+
+	mas_set(&mas, vstart);
+	va = mas_find(&mas, ULONG_MAX);
+
+	while (va) {
+		if (is_within_this_va(va, size, align, vstart))
+			return va;
+
+		va = mas_next(&mas, ULONG_MAX);
+	}
+
 	return NULL;
 }
 
@@ -2015,8 +2005,8 @@ find_vmap_lowest_linear_match(struct list_head *head, unsigned long size,
 }
 
 static void
-find_vmap_lowest_match_check(struct rb_root *root, struct list_head *head,
-			     unsigned long size, unsigned long align)
+find_vmap_lowest_match_check(struct list_head *head, unsigned long size,
+			     unsigned long align)
 {
 	struct vmap_area *va_1, *va_2;
 	unsigned long vstart;
@@ -2025,7 +2015,10 @@ find_vmap_lowest_match_check(struct rb_root *root, struct list_head *head,
 	get_random_bytes(&rnd, sizeof(rnd));
 	vstart = VMALLOC_START + rnd;
 
-	va_1 = find_vmap_lowest_match(root, size, align, vstart, false);
+	if (free_mt_supported())
+		va_1 = find_vmap_lowest_match_mt(&free_vmap_area_mt, size, align, vstart);
+	else
+		va_1 = find_vmap_lowest_linear_match(head, size, align, vstart);
 	va_2 = find_vmap_lowest_linear_match(head, size, align, vstart);
 
 	if (va_1 != va_2)
@@ -2069,11 +2062,12 @@ classify_va_fit_type(struct vmap_area *va,
 }
 
 static __always_inline int
-va_clip(struct rb_root *root, struct list_head *head,
-		struct vmap_area *va, unsigned long nva_start_addr,
-		unsigned long size)
+va_clip(struct vmap_area *va, unsigned long nva_start_addr,
+	unsigned long size)
 {
 	struct vmap_area *lva = NULL;
+	unsigned long old_start = va->va_start;
+	unsigned long old_end = va->va_end;
 	enum fit_type type = classify_va_fit_type(va, nva_start_addr, size);
 
 	if (type == FL_FIT_TYPE) {
@@ -2084,7 +2078,7 @@ va_clip(struct rb_root *root, struct list_head *head,
 		 * V      NVA      V
 		 * |---------------|
 		 */
-		unlink_va_augment(va, root);
+		unlink_vmap_area_free_locked(va);
 		kmem_cache_free(vmap_area_cachep, va);
 	} else if (type == LE_FIT_TYPE) {
 		/*
@@ -2159,10 +2153,11 @@ va_clip(struct rb_root *root, struct list_head *head,
 	}
 
 	if (type != FL_FIT_TYPE) {
-		augment_tree_propagate_from(va);
+		if (free_mt_supported())
+			free_mt_update_va_locked(va, old_start, old_end);
 
 		if (lva)	/* type == NE_FIT_TYPE */
-			insert_vmap_area_augment(lva, &va->rb_node, root, head);
+			insert_vmap_area_free_locked(lva);
 	}
 
 	return 0;
@@ -2170,7 +2165,6 @@ va_clip(struct rb_root *root, struct list_head *head,
 
 static unsigned long
 va_alloc(struct vmap_area *va,
-		struct rb_root *root, struct list_head *head,
 		unsigned long size, unsigned long align,
 		unsigned long vstart, unsigned long vend)
 {
@@ -2187,7 +2181,7 @@ va_alloc(struct vmap_area *va,
 		return -ERANGE;
 
 	/* Update the free vmap_area. */
-	ret = va_clip(root, head, va, nva_start_addr, size);
+	ret = va_clip(va, nva_start_addr, size);
 	if (WARN_ON_ONCE(ret))
 		return ret;
 
@@ -2199,35 +2193,37 @@ va_alloc(struct vmap_area *va,
  * Otherwise an error value is returned that indicates failure.
  */
 static __always_inline unsigned long
-__alloc_vmap_area(struct rb_root *root, struct list_head *head,
-	unsigned long size, unsigned long align,
-	unsigned long vstart, unsigned long vend)
+__alloc_vmap_area(unsigned long size, unsigned long align,
+		  unsigned long vstart, unsigned long vend)
 {
-	bool adjust_search_size = true;
 	unsigned long nva_start_addr;
 	struct vmap_area *va;
 
+	lockdep_assert_held(&free_vmap_area_lock);
+
 	/*
-	 * Do not adjust when:
-	 *   a) align <= PAGE_SIZE, because it does not make any sense.
-	 *      All blocks(their start addresses) are at least PAGE_SIZE
-	 *      aligned anyway;
-	 *   b) a short range where a requested size corresponds to exactly
-	 *      specified [vstart:vend] interval and an alignment > PAGE_SIZE.
-	 *      With adjusted search length an allocation would not succeed.
+	 * Next-fit with wrap-around lowers repeated list-head scans in
+	 * high-churn workloads.
 	 */
-	if (align <= PAGE_SIZE || (align > PAGE_SIZE && (vend - vstart) == size))
-		adjust_search_size = false;
+	va = find_vmap_match_list_next_fit(&free_vmap_area_list, size, align,
+					   vstart, vend);
 
-	va = find_vmap_lowest_match(root, size, align, vstart, adjust_search_size);
 	if (unlikely(!va))
 		return -ENOENT;
 
-	nva_start_addr = va_alloc(va, root, head, size, align, vstart, vend);
+	nva_start_addr = va_alloc(va, size, align, vstart, vend);
+	if (!IS_ERR_VALUE(nva_start_addr)) {
+		unsigned long next_hint;
+
+		if (check_add_overflow(nva_start_addr, size, &next_hint))
+			free_vmap_alloc_hint = vstart;
+		else
+			free_vmap_alloc_hint = next_hint;
+	}
 
 #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
 	if (!IS_ERR_VALUE(nva_start_addr))
-		find_vmap_lowest_match_check(root, head, size, align);
+		find_vmap_lowest_match_check(&free_vmap_area_list, size, align);
 #endif
 
 	return nva_start_addr;
@@ -2441,8 +2437,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 retry:
 	if (IS_ERR_VALUE(addr)) {
 		preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node);
-		addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list,
-			size, align, vstart, vend);
+		addr = __alloc_vmap_area(size, align, vstart, vend);
 		spin_unlock(&free_vmap_area_lock);
 
 		/*
@@ -2589,7 +2584,6 @@ static void
 decay_va_pool_node(struct vmap_node *vn, bool full_decay)
 {
 	LIST_HEAD(decay_list);
-	struct rb_root decay_root = RB_ROOT;
 	struct vmap_area *va, *nva;
 	unsigned long n_decay, pool_len;
 	int i;
@@ -2618,7 +2612,7 @@ decay_va_pool_node(struct vmap_node *vn, bool full_decay)
 				break;
 
 			list_del_init(&va->list);
-			merge_or_add_vmap_area(va, &decay_root, &decay_list);
+			merge_or_add_vmap_area(va, &decay_list);
 		}
 
 		/*
@@ -5456,8 +5450,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 			/* It is a BUG(), but trigger recovery instead. */
 			goto recovery;
 
-		ret = va_clip(&free_vmap_area_root,
-			&free_vmap_area_list, va, start, size);
+		ret = va_clip(va, start, size);
 		if (WARN_ON_ONCE(unlikely(ret)))
 			/* It is a BUG(), but trigger recovery instead. */
 			goto recovery;

-- 
2.34.1



  parent reply	other threads:[~2026-06-13 17:20 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-13 17:19 [PATCH RFC 00/12] mm/vmalloc: migrate vmap_area indexing from rb-tree to maple-tree Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 01/12] mm/vmalloc: introduce maple_tree-based indexing for vmap_area Pranjal Arya
2026-06-13 17:19 ` Pranjal Arya [this message]
2026-06-13 17:19 ` [PATCH RFC 03/12] mm/vmalloc: convert free, purge, and pcpu paths to maple_tree Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 04/12] mm/vmalloc: finalize maple-only indexing and shrink struct vmap_area Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 05/12] mm/vmalloc: tighten failure handling under memory pressure Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 06/12] mm/vmalloc: tighten alloc/free hot paths Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 07/12] mm/vmalloc: consolidate occupied tree as authoritative index on hot path Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 08/12] mm/vmalloc: track lazy-purge queue as a list_head Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 09/12] mm/vmalloc: collapse busy-tree find-then-unlink into a single mas_erase Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 10/12] mm/vmalloc: per-CPU caching of free ranges from the maple_tree allocator Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 11/12] mm/vmalloc: O(1) lookup of cached vmap_areas with bounded fast-reject Pranjal Arya
2026-06-13 17:19 ` [PATCH RFC 12/12] mm/vmalloc: harden bump-allocator alloc/free against UBSAN array bounds Pranjal Arya
2026-06-13 23:15 ` [PATCH RFC 00/12] mm/vmalloc: migrate vmap_area indexing from rb-tree to maple-tree Matthew Wilcox
2026-06-14  6:35 ` [syzbot ci] " syzbot ci
2026-06-14  6:58 ` [PATCH RFC 00/12] " Uladzislau Rezki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260613-vmalloc_maple-v1-2-0aa740bb944b@oss.qualcomm.com \
    --to=pranjal.arya@oss.qualcomm.com \
    --cc=Suzuki.Poulose@arm.com \
    --cc=akpm@linux-foundation.org \
    --cc=aliceryhl@google.com \
    --cc=andrewjballance@gmail.com \
    --cc=balbirs@nvidia.com \
    --cc=dev.jain@arm.com \
    --cc=dvyukov@google.com \
    --cc=elver@google.com \
    --cc=glider@google.com \
    --cc=jackmanb@google.com \
    --cc=liam@infradead.org \
    --cc=linux-arm-msm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ljs@kernel.org \
    --cc=maple-tree@lists.infradead.org \
    --cc=neil.armstrong@linaro.org \
    --cc=praan@google.com \
    --cc=puranjay@kernel.org \
    --cc=santosh.shukla@amd.com \
    --cc=shuah@kernel.org \
    --cc=smostafa@google.com \
    --cc=sudeep.holla@kernel.org \
    --cc=surenb@google.com \
    --cc=urezki@gmail.com \
    --cc=will@kernel.org \
    --cc=wkarny@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox