[PATCH v7 1/3] drm/buddy: Optimize free block management with RB tree

public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed

* [PATCH v7 1/3] drm/buddy: Optimize free block management with RB tree
@ 2025-09-23  9:02 Arunpravin Paneer Selvam
  2025-09-23  9:02 ` [PATCH v7 2/3] drm/buddy: Separate clear and dirty free block trees Arunpravin Paneer Selvam
                   ` (5 more replies)
  0 siblings, 6 replies; 11+ messages in thread
From: Arunpravin Paneer Selvam @ 2025-09-23  9:02 UTC (permalink / raw)
  To: christian.koenig, matthew.auld, dri-devel, amd-gfx, intel-gfx,
	intel-xe
  Cc: alexander.deucher, jani.nikula, peterz, samuel.pitoiset,
	Arunpravin Paneer Selvam, stable

Replace the freelist (O(n)) used for free block management with a
red-black tree, providing more efficient O(log n) search, insert,
and delete operations. This improves scalability and performance
when managing large numbers of free blocks per order (e.g., hundreds
or thousands).

In the VK-CTS memory stress subtest, the buddy manager merges
fragmented memory and inserts freed blocks into the freelist. Since
freelist insertion is O(n), this becomes a bottleneck as fragmentation
increases. Benchmarking shows list_insert_sorted() consumes ~52.69% CPU
with the freelist, compared to just 0.03% with the RB tree
(rbtree_insert.isra.0), despite performing the same sorted insert.

This also improves performance in heavily fragmented workloads,
such as games or graphics tests that stress memory.

As the buddy allocator evolves with new features such as clear-page
tracking, the resulting fragmentation and complexity have grown.
These RB-tree based design changes are introduced to address that
growth and ensure the allocator continues to perform efficiently
under fragmented conditions.

The RB tree implementation with separate clear/dirty trees provides:
- O(n log n) aggregate complexity for all operations instead of O(n^2)
- Elimination of soft lockups and system instability
- Improved code maintainability and clarity
- Better scalability for large memory systems
- Predictable performance under fragmentation

v3(Matthew):
  - Remove RB_EMPTY_NODE check in force_merge function.
  - Rename rb for loop macros to have less generic names and move to
    .c file.
  - Make the rb node rb and link field as union.

v4(Jani Nikula):
  - The kernel-doc comment should be "/**"
  - Move all the rbtree macros to rbtree.h and add parens to ensure
    correct precedence.

v5:
  - Remove the inline in a .c file (Jani Nikula).

v6(Peter Zijlstra):
  - Add rb_add() function replacing the existing rbtree_insert() code.

v7:
  - A full walk iteration in rbtree is slower than the list (Peter Zijlstra).
  - The existing rbtree_postorder_for_each_entry_safe macro should be used
    in scenarios where traversal order is not a critical factor (Christian).

Cc: stable@vger.kernel.org
Fixes: a68c7eaa7a8f ("drm/amdgpu: Enable clear page functionality")
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
---
 drivers/gpu/drm/drm_buddy.c | 188 ++++++++++++++++++++++--------------
 include/drm/drm_buddy.h     |  11 ++-
 2 files changed, 124 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index a94061f373de..67aa67229cc3 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -14,6 +14,8 @@
 
 static struct kmem_cache *slab_blocks;
 
+#define rbtree_get_free_block(node) rb_entry((node), struct drm_buddy_block, rb)
+
 static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
 					       struct drm_buddy_block *parent,
 					       unsigned int order,
@@ -31,6 +33,8 @@ static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
 	block->header |= order;
 	block->parent = parent;
 
+	RB_CLEAR_NODE(&block->rb);
+
 	BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
 	return block;
 }
@@ -41,23 +45,49 @@ static void drm_block_free(struct drm_buddy *mm,
 	kmem_cache_free(slab_blocks, block);
 }
 
-static void list_insert_sorted(struct drm_buddy *mm,
-			       struct drm_buddy_block *block)
+static bool drm_buddy_block_offset_less(const struct drm_buddy_block *block,
+					const struct drm_buddy_block *node)
 {
-	struct drm_buddy_block *node;
-	struct list_head *head;
+	return drm_buddy_block_offset(block) < drm_buddy_block_offset(node);
+}
 
-	head = &mm->free_list[drm_buddy_block_order(block)];
-	if (list_empty(head)) {
-		list_add(&block->link, head);
-		return;
-	}
+static bool rbtree_block_offset_less(struct rb_node *block,
+				     const struct rb_node *node)
+{
+	return drm_buddy_block_offset_less(rbtree_get_free_block(block),
+					   rbtree_get_free_block(node));
+}
 
-	list_for_each_entry(node, head, link)
-		if (drm_buddy_block_offset(block) < drm_buddy_block_offset(node))
-			break;
+static void rbtree_insert(struct drm_buddy *mm,
+			  struct drm_buddy_block *block)
+{
+	rb_add(&block->rb,
+	       &mm->free_tree[drm_buddy_block_order(block)],
+	       rbtree_block_offset_less);
+}
+
+static void rbtree_remove(struct drm_buddy *mm,
+			  struct drm_buddy_block *block)
+{
+	struct rb_root *root;
+
+	root = &mm->free_tree[drm_buddy_block_order(block)];
+	rb_erase(&block->rb, root);
 
-	__list_add(&block->link, node->link.prev, &node->link);
+	RB_CLEAR_NODE(&block->rb);
+}
+
+static struct drm_buddy_block *
+rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
+{
+	struct rb_node *node = rb_last(&mm->free_tree[order]);
+
+	return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
+}
+
+static bool rbtree_is_empty(struct drm_buddy *mm, unsigned int order)
+{
+	return RB_EMPTY_ROOT(&mm->free_tree[order]);
 }
 
 static void clear_reset(struct drm_buddy_block *block)
@@ -70,12 +100,13 @@ static void mark_cleared(struct drm_buddy_block *block)
 	block->header |= DRM_BUDDY_HEADER_CLEAR;
 }
 
-static void mark_allocated(struct drm_buddy_block *block)
+static void mark_allocated(struct drm_buddy *mm,
+			   struct drm_buddy_block *block)
 {
 	block->header &= ~DRM_BUDDY_HEADER_STATE;
 	block->header |= DRM_BUDDY_ALLOCATED;
 
-	list_del(&block->link);
+	rbtree_remove(mm, block);
 }
 
 static void mark_free(struct drm_buddy *mm,
@@ -84,15 +115,16 @@ static void mark_free(struct drm_buddy *mm,
 	block->header &= ~DRM_BUDDY_HEADER_STATE;
 	block->header |= DRM_BUDDY_FREE;
 
-	list_insert_sorted(mm, block);
+	rbtree_insert(mm, block);
 }
 
-static void mark_split(struct drm_buddy_block *block)
+static void mark_split(struct drm_buddy *mm,
+		       struct drm_buddy_block *block)
 {
 	block->header &= ~DRM_BUDDY_HEADER_STATE;
 	block->header |= DRM_BUDDY_SPLIT;
 
-	list_del(&block->link);
+	rbtree_remove(mm, block);
 }
 
 static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
@@ -148,7 +180,7 @@ static unsigned int __drm_buddy_free(struct drm_buddy *mm,
 				mark_cleared(parent);
 		}
 
-		list_del(&buddy->link);
+		rbtree_remove(mm, buddy);
 		if (force_merge && drm_buddy_block_is_clear(buddy))
 			mm->clear_avail -= drm_buddy_block_size(mm, buddy);
 
@@ -179,13 +211,19 @@ static int __force_merge(struct drm_buddy *mm,
 		return -EINVAL;
 
 	for (i = min_order - 1; i >= 0; i--) {
-		struct drm_buddy_block *block, *prev;
+		struct rb_root *root = &mm->free_tree[i];
+		struct rb_node *iter;
+
+		iter = rb_last(root);
 
-		list_for_each_entry_safe_reverse(block, prev, &mm->free_list[i], link) {
-			struct drm_buddy_block *buddy;
+		while (iter) {
+			struct drm_buddy_block *block, *buddy;
 			u64 block_start, block_end;
 
-			if (!block->parent)
+			block = rbtree_get_free_block(iter);
+			iter = rb_prev(iter);
+
+			if (!block || !block->parent)
 				continue;
 
 			block_start = drm_buddy_block_offset(block);
@@ -201,15 +239,10 @@ static int __force_merge(struct drm_buddy *mm,
 			WARN_ON(drm_buddy_block_is_clear(block) ==
 				drm_buddy_block_is_clear(buddy));
 
-			/*
-			 * If the prev block is same as buddy, don't access the
-			 * block in the next iteration as we would free the
-			 * buddy block as part of the free function.
-			 */
-			if (prev == buddy)
-				prev = list_prev_entry(prev, link);
+			if (iter == &buddy->rb)
+				iter = rb_prev(iter);
 
-			list_del(&block->link);
+			rbtree_remove(mm, block);
 			if (drm_buddy_block_is_clear(block))
 				mm->clear_avail -= drm_buddy_block_size(mm, block);
 
@@ -258,14 +291,14 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
 
 	BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
 
-	mm->free_list = kmalloc_array(mm->max_order + 1,
-				      sizeof(struct list_head),
+	mm->free_tree = kmalloc_array(mm->max_order + 1,
+				      sizeof(struct rb_root),
 				      GFP_KERNEL);
-	if (!mm->free_list)
+	if (!mm->free_tree)
 		return -ENOMEM;
 
 	for (i = 0; i <= mm->max_order; ++i)
-		INIT_LIST_HEAD(&mm->free_list[i]);
+		mm->free_tree[i] = RB_ROOT;
 
 	mm->n_roots = hweight64(size);
 
@@ -273,7 +306,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
 				  sizeof(struct drm_buddy_block *),
 				  GFP_KERNEL);
 	if (!mm->roots)
-		goto out_free_list;
+		goto out_free_tree;
 
 	offset = 0;
 	i = 0;
@@ -312,8 +345,8 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
 	while (i--)
 		drm_block_free(mm, mm->roots[i]);
 	kfree(mm->roots);
-out_free_list:
-	kfree(mm->free_list);
+out_free_tree:
+	kfree(mm->free_tree);
 	return -ENOMEM;
 }
 EXPORT_SYMBOL(drm_buddy_init);
@@ -323,7 +356,7 @@ EXPORT_SYMBOL(drm_buddy_init);
  *
  * @mm: DRM buddy manager to free
  *
- * Cleanup memory manager resources and the freelist
+ * Cleanup memory manager resources and the freetree
  */
 void drm_buddy_fini(struct drm_buddy *mm)
 {
@@ -350,7 +383,7 @@ void drm_buddy_fini(struct drm_buddy *mm)
 	WARN_ON(mm->avail != mm->size);
 
 	kfree(mm->roots);
-	kfree(mm->free_list);
+	kfree(mm->free_tree);
 }
 EXPORT_SYMBOL(drm_buddy_fini);
 
@@ -383,7 +416,7 @@ static int split_block(struct drm_buddy *mm,
 		clear_reset(block);
 	}
 
-	mark_split(block);
+	mark_split(mm, block);
 
 	return 0;
 }
@@ -412,7 +445,7 @@ EXPORT_SYMBOL(drm_get_buddy);
  * @is_clear: blocks clear state
  *
  * Reset the clear state based on @is_clear value for each block
- * in the freelist.
+ * in the freetree.
  */
 void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
 {
@@ -431,9 +464,9 @@ void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
 	}
 
 	for (i = 0; i <= mm->max_order; ++i) {
-		struct drm_buddy_block *block;
+		struct drm_buddy_block *block, *tmp;
 
-		list_for_each_entry_reverse(block, &mm->free_list[i], link) {
+		rbtree_postorder_for_each_entry_safe(block, tmp, &mm->free_tree[i], rb) {
 			if (is_clear != drm_buddy_block_is_clear(block)) {
 				if (is_clear) {
 					mark_cleared(block);
@@ -639,14 +672,18 @@ get_maxblock(struct drm_buddy *mm, unsigned int order,
 	unsigned int i;
 
 	for (i = order; i <= mm->max_order; ++i) {
+		struct rb_node *iter = rb_last(&mm->free_tree[i]);
 		struct drm_buddy_block *tmp_block;
 
-		list_for_each_entry_reverse(tmp_block, &mm->free_list[i], link) {
-			if (block_incompatible(tmp_block, flags))
-				continue;
+		while (iter) {
+			tmp_block = rbtree_get_free_block(iter);
 
-			block = tmp_block;
-			break;
+			if (!block_incompatible(tmp_block, flags)) {
+				block = tmp_block;
+				break;
+			}
+
+			iter = rb_prev(iter);
 		}
 
 		if (!block)
@@ -667,7 +704,7 @@ get_maxblock(struct drm_buddy *mm, unsigned int order,
 }
 
 static struct drm_buddy_block *
-alloc_from_freelist(struct drm_buddy *mm,
+alloc_from_freetree(struct drm_buddy *mm,
 		    unsigned int order,
 		    unsigned long flags)
 {
@@ -682,14 +719,18 @@ alloc_from_freelist(struct drm_buddy *mm,
 			tmp = drm_buddy_block_order(block);
 	} else {
 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
+			struct rb_node *iter = rb_last(&mm->free_tree[tmp]);
 			struct drm_buddy_block *tmp_block;
 
-			list_for_each_entry_reverse(tmp_block, &mm->free_list[tmp], link) {
-				if (block_incompatible(tmp_block, flags))
-					continue;
+			while (iter) {
+				tmp_block = rbtree_get_free_block(iter);
 
-				block = tmp_block;
-				break;
+				if (!block_incompatible(tmp_block, flags)) {
+					block = tmp_block;
+					break;
+				}
+
+				iter = rb_prev(iter);
 			}
 
 			if (block)
@@ -700,10 +741,8 @@ alloc_from_freelist(struct drm_buddy *mm,
 	if (!block) {
 		/* Fallback method */
 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
-			if (!list_empty(&mm->free_list[tmp])) {
-				block = list_last_entry(&mm->free_list[tmp],
-							struct drm_buddy_block,
-							link);
+			if (!rbtree_is_empty(mm, tmp)) {
+				block = rbtree_last_entry(mm, tmp);
 				if (block)
 					break;
 			}
@@ -771,7 +810,7 @@ static int __alloc_range(struct drm_buddy *mm,
 
 		if (contains(start, end, block_start, block_end)) {
 			if (drm_buddy_block_is_free(block)) {
-				mark_allocated(block);
+				mark_allocated(mm, block);
 				total_allocated += drm_buddy_block_size(mm, block);
 				mm->avail -= drm_buddy_block_size(mm, block);
 				if (drm_buddy_block_is_clear(block))
@@ -849,8 +888,8 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
 {
 	u64 rhs_offset, lhs_offset, lhs_size, filled;
 	struct drm_buddy_block *block;
-	struct list_head *list;
 	LIST_HEAD(blocks_lhs);
+	struct rb_node *iter;
 	unsigned long pages;
 	unsigned int order;
 	u64 modify_size;
@@ -862,11 +901,14 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
 	if (order == 0)
 		return -ENOSPC;
 
-	list = &mm->free_list[order];
-	if (list_empty(list))
+	if (rbtree_is_empty(mm, order))
 		return -ENOSPC;
 
-	list_for_each_entry_reverse(block, list, link) {
+	iter = rb_last(&mm->free_tree[order]);
+
+	while (iter) {
+		block = rbtree_get_free_block(iter);
+
 		/* Allocate blocks traversing RHS */
 		rhs_offset = drm_buddy_block_offset(block);
 		err =  __drm_buddy_alloc_range(mm, rhs_offset, size,
@@ -891,6 +933,8 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
 		}
 		/* Free blocks for the next iteration */
 		drm_buddy_free_list_internal(mm, blocks);
+
+		iter = rb_prev(iter);
 	}
 
 	return -ENOSPC;
@@ -976,7 +1020,7 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
 	list_add(&block->tmp_link, &dfs);
 	err =  __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
 	if (err) {
-		mark_allocated(block);
+		mark_allocated(mm, block);
 		mm->avail -= drm_buddy_block_size(mm, block);
 		if (drm_buddy_block_is_clear(block))
 			mm->clear_avail -= drm_buddy_block_size(mm, block);
@@ -999,8 +1043,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
 		return  __drm_buddy_alloc_range_bias(mm, start, end,
 						     order, flags);
 	else
-		/* Allocate from freelist */
-		return alloc_from_freelist(mm, order, flags);
+		/* Allocate from freetree */
+		return alloc_from_freetree(mm, order, flags);
 }
 
 /**
@@ -1017,8 +1061,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
  * alloc_range_bias() called on range limitations, which traverses
  * the tree and returns the desired block.
  *
- * alloc_from_freelist() called when *no* range restrictions
- * are enforced, which picks the block from the freelist.
+ * alloc_from_freetree() called when *no* range restrictions
+ * are enforced, which picks the block from the freetree.
  *
  * Returns:
  * 0 on success, error code on failure.
@@ -1120,7 +1164,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 			}
 		} while (1);
 
-		mark_allocated(block);
+		mark_allocated(mm, block);
 		mm->avail -= drm_buddy_block_size(mm, block);
 		if (drm_buddy_block_is_clear(block))
 			mm->clear_avail -= drm_buddy_block_size(mm, block);
@@ -1201,10 +1245,10 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
 		   mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
 
 	for (order = mm->max_order; order >= 0; order--) {
-		struct drm_buddy_block *block;
+		struct drm_buddy_block *block, *tmp;
 		u64 count = 0, free;
 
-		list_for_each_entry(block, &mm->free_list[order], link) {
+		rbtree_postorder_for_each_entry_safe(block, tmp, &mm->free_tree[order], rb) {
 			BUG_ON(!drm_buddy_block_is_free(block));
 			count++;
 		}
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index 513837632b7d..9ee105d4309f 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -10,6 +10,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/rbtree.h>
 
 #include <drm/drm_print.h>
 
@@ -53,7 +54,11 @@ struct drm_buddy_block {
 	 * a list, if so desired. As soon as the block is freed with
 	 * drm_buddy_free* ownership is given back to the mm.
 	 */
-	struct list_head link;
+	union {
+		struct rb_node rb;
+		struct list_head link;
+	};
+
 	struct list_head tmp_link;
 };
 
@@ -68,7 +73,7 @@ struct drm_buddy_block {
  */
 struct drm_buddy {
 	/* Maintain a free list for each order. */
-	struct list_head *free_list;
+	struct rb_root *free_tree;
 
 	/*
 	 * Maintain explicit binary tree(s) to track the allocation of the
@@ -94,7 +99,7 @@ struct drm_buddy {
 };
 
 static inline u64
-drm_buddy_block_offset(struct drm_buddy_block *block)
+drm_buddy_block_offset(const struct drm_buddy_block *block)
 {
 	return block->header & DRM_BUDDY_HEADER_OFFSET;
 }

base-commit: 3a9cf301794c1a49d95eeb13119ff490fb5cfe88
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v7 2/3] drm/buddy: Separate clear and dirty free block trees
  2025-09-23  9:02 [PATCH v7 1/3] drm/buddy: Optimize free block management with RB tree Arunpravin Paneer Selvam
@ 2025-09-23  9:02 ` Arunpravin Paneer Selvam
  2025-09-26 17:08   ` Matthew Auld
  2025-09-23  9:02 ` [PATCH v7 3/3] drm/buddy: Add KUnit tests for allocator performance under fragmentation Arunpravin Paneer Selvam
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 11+ messages in thread
From: Arunpravin Paneer Selvam @ 2025-09-23  9:02 UTC (permalink / raw)
  To: christian.koenig, matthew.auld, dri-devel, amd-gfx, intel-gfx,
	intel-xe
  Cc: alexander.deucher, jani.nikula, peterz, samuel.pitoiset,
	Arunpravin Paneer Selvam, stable

Maintain two separate RB trees per order - one for clear (zeroed) blocks
and another for dirty (uncleared) blocks. This separation improves
code clarity and makes it more obvious which tree is being searched
during allocation. It also improves scalability and efficiency when
searching for a specific type of block, avoiding unnecessary checks
and making the allocator more predictable under fragmentation.

The changes have been validated using the existing drm_buddy_test
KUnit test cases, along with selected graphics workloads,
to ensure correctness and avoid regressions.

v2: Missed adding the suggested-by tag. Added it in v2.

v3(Matthew):
  - Remove the double underscores from the internal functions.
  - Rename the internal functions to have less generic names.
  - Fix the error handling code.
  - Pass tree argument for the tree macro.
  - Use the existing dirty/free bit instead of new tree field.
  - Make free_trees[] instead of clear_tree and dirty_tree for
    more cleaner approach.

v4:
  - A bug was reported by Intel CI and it is fixed by
    Matthew Auld.
  - Replace the get_root function with
    &mm->free_trees[tree][order] (Matthew)
  - Remove the unnecessary rbtree_is_empty() check (Matthew)
  - Remove the unnecessary get_tree_for_flags() function.
  - Rename get_tree_for_block() name with get_block_tree() for more
    clarity.

v5(Jani Nikula):
  - Don't use static inline in .c files.
  - enum free_tree and enumerator names are quite generic for a header
    and usage and the whole enum should be an implementation detail.

v6:
  - Rewrite the __force_merge() function using the rb_last() and rb_prev().

Cc: stable@vger.kernel.org
Fixes: a68c7eaa7a8f ("drm/amdgpu: Enable clear page functionality")
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Suggested-by: Matthew Auld <matthew.auld@intel.com>
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4260
---
 drivers/gpu/drm/drm_buddy.c | 321 ++++++++++++++++++++----------------
 include/drm/drm_buddy.h     |   2 +-
 2 files changed, 182 insertions(+), 141 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 67aa67229cc3..6e12a0b5d5e3 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -12,9 +12,16 @@
 
 #include <drm/drm_buddy.h>
 
+enum drm_buddy_free_tree {
+	DRM_BUDDY_CLEAR_TREE = 0,
+	DRM_BUDDY_DIRTY_TREE,
+	DRM_BUDDY_MAX_FREE_TREES,
+};
+
 static struct kmem_cache *slab_blocks;
 
-#define rbtree_get_free_block(node) rb_entry((node), struct drm_buddy_block, rb)
+#define for_each_free_tree(tree) \
+	for ((tree) = 0; (tree) < DRM_BUDDY_MAX_FREE_TREES; (tree)++)
 
 static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
 					       struct drm_buddy_block *parent,
@@ -45,6 +52,30 @@ static void drm_block_free(struct drm_buddy *mm,
 	kmem_cache_free(slab_blocks, block);
 }
 
+static enum drm_buddy_free_tree
+get_block_tree(struct drm_buddy_block *block)
+{
+	return drm_buddy_block_is_clear(block) ?
+	       DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
+}
+
+static struct drm_buddy_block *
+rbtree_get_free_block(const struct rb_node *node)
+{
+	return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
+}
+
+static struct drm_buddy_block *
+rbtree_last_free_block(struct rb_root *root)
+{
+	return rbtree_get_free_block(rb_last(root));
+}
+
+static bool rbtree_is_empty(struct rb_root *root)
+{
+	return RB_EMPTY_ROOT(root);
+}
+
 static bool drm_buddy_block_offset_less(const struct drm_buddy_block *block,
 					const struct drm_buddy_block *node)
 {
@@ -59,37 +90,28 @@ static bool rbtree_block_offset_less(struct rb_node *block,
 }
 
 static void rbtree_insert(struct drm_buddy *mm,
-			  struct drm_buddy_block *block)
+			  struct drm_buddy_block *block,
+			  enum drm_buddy_free_tree tree)
 {
 	rb_add(&block->rb,
-	       &mm->free_tree[drm_buddy_block_order(block)],
+	       &mm->free_trees[tree][drm_buddy_block_order(block)],
 	       rbtree_block_offset_less);
 }
 
 static void rbtree_remove(struct drm_buddy *mm,
 			  struct drm_buddy_block *block)
 {
+	unsigned int order = drm_buddy_block_order(block);
 	struct rb_root *root;
+	enum drm_buddy_free_tree tree;
 
-	root = &mm->free_tree[drm_buddy_block_order(block)];
-	rb_erase(&block->rb, root);
+	tree = get_block_tree(block);
+	root = &mm->free_trees[tree][order];
 
+	rb_erase(&block->rb, root);
 	RB_CLEAR_NODE(&block->rb);
 }
 
-static struct drm_buddy_block *
-rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
-{
-	struct rb_node *node = rb_last(&mm->free_tree[order]);
-
-	return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
-}
-
-static bool rbtree_is_empty(struct drm_buddy *mm, unsigned int order)
-{
-	return RB_EMPTY_ROOT(&mm->free_tree[order]);
-}
-
 static void clear_reset(struct drm_buddy_block *block)
 {
 	block->header &= ~DRM_BUDDY_HEADER_CLEAR;
@@ -112,10 +134,13 @@ static void mark_allocated(struct drm_buddy *mm,
 static void mark_free(struct drm_buddy *mm,
 		      struct drm_buddy_block *block)
 {
+	enum drm_buddy_free_tree tree;
+
 	block->header &= ~DRM_BUDDY_HEADER_STATE;
 	block->header |= DRM_BUDDY_FREE;
 
-	rbtree_insert(mm, block);
+	tree = get_block_tree(block);
+	rbtree_insert(mm, block, tree);
 }
 
 static void mark_split(struct drm_buddy *mm,
@@ -201,6 +226,7 @@ static int __force_merge(struct drm_buddy *mm,
 			 u64 end,
 			 unsigned int min_order)
 {
+	enum drm_buddy_free_tree tree;
 	unsigned int order;
 	int i;
 
@@ -210,45 +236,48 @@ static int __force_merge(struct drm_buddy *mm,
 	if (min_order > mm->max_order)
 		return -EINVAL;
 
-	for (i = min_order - 1; i >= 0; i--) {
-		struct rb_root *root = &mm->free_tree[i];
-		struct rb_node *iter;
+	for_each_free_tree(tree) {
+		for (i = min_order - 1; i >= 0; i--) {
+			struct rb_node *iter = rb_last(&mm->free_trees[tree][i]);
 
-		iter = rb_last(root);
-
-		while (iter) {
-			struct drm_buddy_block *block, *buddy;
-			u64 block_start, block_end;
+			while (iter) {
+				struct drm_buddy_block *block, *buddy;
+				u64 block_start, block_end;
 
-			block = rbtree_get_free_block(iter);
-			iter = rb_prev(iter);
+				block = rbtree_get_free_block(iter);
+				iter = rb_prev(iter);
 
-			if (!block || !block->parent)
-				continue;
+				if (!block || !block->parent)
+					continue;
 
-			block_start = drm_buddy_block_offset(block);
-			block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+				block_start = drm_buddy_block_offset(block);
+				block_end = block_start + drm_buddy_block_size(mm, block) - 1;
 
-			if (!contains(start, end, block_start, block_end))
-				continue;
+				if (!contains(start, end, block_start, block_end))
+					continue;
 
-			buddy = __get_buddy(block);
-			if (!drm_buddy_block_is_free(buddy))
-				continue;
+				buddy = __get_buddy(block);
+				if (!drm_buddy_block_is_free(buddy))
+					continue;
 
-			WARN_ON(drm_buddy_block_is_clear(block) ==
-				drm_buddy_block_is_clear(buddy));
+				WARN_ON(drm_buddy_block_is_clear(block) ==
+					drm_buddy_block_is_clear(buddy));
 
-			if (iter == &buddy->rb)
-				iter = rb_prev(iter);
+				/*
+				 * Advance to the next node when the current node is the buddy,
+				 * as freeing the block will also remove its buddy from the tree.
+				 */
+				if (iter == &buddy->rb)
+					iter = rb_prev(iter);
 
-			rbtree_remove(mm, block);
-			if (drm_buddy_block_is_clear(block))
-				mm->clear_avail -= drm_buddy_block_size(mm, block);
+				rbtree_remove(mm, block);
+				if (drm_buddy_block_is_clear(block))
+					mm->clear_avail -= drm_buddy_block_size(mm, block);
 
-			order = __drm_buddy_free(mm, block, true);
-			if (order >= min_order)
-				return 0;
+				order = __drm_buddy_free(mm, block, true);
+				if (order >= min_order)
+					return 0;
+			}
 		}
 	}
 
@@ -269,7 +298,7 @@ static int __force_merge(struct drm_buddy *mm,
  */
 int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
 {
-	unsigned int i;
+	unsigned int i, j;
 	u64 offset;
 
 	if (size < chunk_size)
@@ -291,14 +320,22 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
 
 	BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
 
-	mm->free_tree = kmalloc_array(mm->max_order + 1,
-				      sizeof(struct rb_root),
-				      GFP_KERNEL);
-	if (!mm->free_tree)
+	mm->free_trees = kmalloc_array(DRM_BUDDY_MAX_FREE_TREES,
+				       sizeof(*mm->free_trees),
+				       GFP_KERNEL);
+	if (!mm->free_trees)
 		return -ENOMEM;
 
-	for (i = 0; i <= mm->max_order; ++i)
-		mm->free_tree[i] = RB_ROOT;
+	for (i = 0; i < DRM_BUDDY_MAX_FREE_TREES; i++) {
+		mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
+						  sizeof(struct rb_root),
+						  GFP_KERNEL);
+		if (!mm->free_trees[i])
+			goto out_free_tree;
+
+		for (j = 0; j <= mm->max_order; ++j)
+			mm->free_trees[i][j] = RB_ROOT;
+	}
 
 	mm->n_roots = hweight64(size);
 
@@ -346,7 +383,9 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
 		drm_block_free(mm, mm->roots[i]);
 	kfree(mm->roots);
 out_free_tree:
-	kfree(mm->free_tree);
+	while (i--)
+		kfree(mm->free_trees[i]);
+	kfree(mm->free_trees);
 	return -ENOMEM;
 }
 EXPORT_SYMBOL(drm_buddy_init);
@@ -382,8 +421,9 @@ void drm_buddy_fini(struct drm_buddy *mm)
 
 	WARN_ON(mm->avail != mm->size);
 
+	for (i = 0; i < DRM_BUDDY_MAX_FREE_TREES; i++)
+		kfree(mm->free_trees[i]);
 	kfree(mm->roots);
-	kfree(mm->free_tree);
 }
 EXPORT_SYMBOL(drm_buddy_fini);
 
@@ -407,8 +447,7 @@ static int split_block(struct drm_buddy *mm,
 		return -ENOMEM;
 	}
 
-	mark_free(mm, block->left);
-	mark_free(mm, block->right);
+	mark_split(mm, block);
 
 	if (drm_buddy_block_is_clear(block)) {
 		mark_cleared(block->left);
@@ -416,7 +455,8 @@ static int split_block(struct drm_buddy *mm,
 		clear_reset(block);
 	}
 
-	mark_split(mm, block);
+	mark_free(mm, block->left);
+	mark_free(mm, block->right);
 
 	return 0;
 }
@@ -449,6 +489,7 @@ EXPORT_SYMBOL(drm_get_buddy);
  */
 void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
 {
+	enum drm_buddy_free_tree src_tree, dst_tree;
 	u64 root_size, size, start;
 	unsigned int order;
 	int i;
@@ -463,19 +504,24 @@ void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
 		size -= root_size;
 	}
 
+	src_tree = is_clear ? DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE;
+	dst_tree = is_clear ? DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
+
 	for (i = 0; i <= mm->max_order; ++i) {
+		struct rb_root *root = &mm->free_trees[src_tree][i];
 		struct drm_buddy_block *block, *tmp;
 
-		rbtree_postorder_for_each_entry_safe(block, tmp, &mm->free_tree[i], rb) {
-			if (is_clear != drm_buddy_block_is_clear(block)) {
-				if (is_clear) {
-					mark_cleared(block);
-					mm->clear_avail += drm_buddy_block_size(mm, block);
-				} else {
-					clear_reset(block);
-					mm->clear_avail -= drm_buddy_block_size(mm, block);
-				}
+		rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
+			rbtree_remove(mm, block);
+			if (is_clear) {
+				mark_cleared(block);
+				mm->clear_avail += drm_buddy_block_size(mm, block);
+			} else {
+				clear_reset(block);
+				mm->clear_avail -= drm_buddy_block_size(mm, block);
 			}
+
+			rbtree_insert(mm, block, dst_tree);
 		}
 	}
 }
@@ -665,27 +711,17 @@ __drm_buddy_alloc_range_bias(struct drm_buddy *mm,
 }
 
 static struct drm_buddy_block *
-get_maxblock(struct drm_buddy *mm, unsigned int order,
-	     unsigned long flags)
+get_maxblock(struct drm_buddy *mm,
+	     unsigned int order,
+	     enum drm_buddy_free_tree tree)
 {
 	struct drm_buddy_block *max_block = NULL, *block = NULL;
+	struct rb_root *root;
 	unsigned int i;
 
 	for (i = order; i <= mm->max_order; ++i) {
-		struct rb_node *iter = rb_last(&mm->free_tree[i]);
-		struct drm_buddy_block *tmp_block;
-
-		while (iter) {
-			tmp_block = rbtree_get_free_block(iter);
-
-			if (!block_incompatible(tmp_block, flags)) {
-				block = tmp_block;
-				break;
-			}
-
-			iter = rb_prev(iter);
-		}
-
+		root = &mm->free_trees[tree][i];
+		block = rbtree_last_free_block(root);
 		if (!block)
 			continue;
 
@@ -709,43 +745,39 @@ alloc_from_freetree(struct drm_buddy *mm,
 		    unsigned long flags)
 {
 	struct drm_buddy_block *block = NULL;
+	struct rb_root *root;
+	enum drm_buddy_free_tree tree;
 	unsigned int tmp;
 	int err;
 
+	tree = (flags & DRM_BUDDY_CLEAR_ALLOCATION) ?
+		DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
+
 	if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
-		block = get_maxblock(mm, order, flags);
+		block = get_maxblock(mm, order, tree);
 		if (block)
 			/* Store the obtained block order */
 			tmp = drm_buddy_block_order(block);
 	} else {
 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
-			struct rb_node *iter = rb_last(&mm->free_tree[tmp]);
-			struct drm_buddy_block *tmp_block;
-
-			while (iter) {
-				tmp_block = rbtree_get_free_block(iter);
-
-				if (!block_incompatible(tmp_block, flags)) {
-					block = tmp_block;
-					break;
-				}
-
-				iter = rb_prev(iter);
-			}
-
+			/* Get RB tree root for this order and tree */
+			root = &mm->free_trees[tree][tmp];
+			block = rbtree_last_free_block(root);
 			if (block)
 				break;
 		}
 	}
 
 	if (!block) {
-		/* Fallback method */
+		/* Try allocating from the other tree */
+		tree = (tree == DRM_BUDDY_CLEAR_TREE) ?
+			DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE;
+
 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
-			if (!rbtree_is_empty(mm, tmp)) {
-				block = rbtree_last_entry(mm, tmp);
-				if (block)
-					break;
-			}
+			root = &mm->free_trees[tree][tmp];
+			block = rbtree_last_free_block(root);
+			if (block)
+				break;
 		}
 
 		if (!block)
@@ -887,9 +919,9 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
 				     struct list_head *blocks)
 {
 	u64 rhs_offset, lhs_offset, lhs_size, filled;
+	enum drm_buddy_free_tree tree;
 	struct drm_buddy_block *block;
 	LIST_HEAD(blocks_lhs);
-	struct rb_node *iter;
 	unsigned long pages;
 	unsigned int order;
 	u64 modify_size;
@@ -901,40 +933,43 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
 	if (order == 0)
 		return -ENOSPC;
 
-	if (rbtree_is_empty(mm, order))
+	if (rbtree_is_empty(&mm->free_trees[DRM_BUDDY_CLEAR_TREE][order]) &&
+	    rbtree_is_empty(&mm->free_trees[DRM_BUDDY_DIRTY_TREE][order]))
 		return -ENOSPC;
 
-	iter = rb_last(&mm->free_tree[order]);
-
-	while (iter) {
-		block = rbtree_get_free_block(iter);
-
-		/* Allocate blocks traversing RHS */
-		rhs_offset = drm_buddy_block_offset(block);
-		err =  __drm_buddy_alloc_range(mm, rhs_offset, size,
-					       &filled, blocks);
-		if (!err || err != -ENOSPC)
-			return err;
-
-		lhs_size = max((size - filled), min_block_size);
-		if (!IS_ALIGNED(lhs_size, min_block_size))
-			lhs_size = round_up(lhs_size, min_block_size);
-
-		/* Allocate blocks traversing LHS */
-		lhs_offset = drm_buddy_block_offset(block) - lhs_size;
-		err =  __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
-					       NULL, &blocks_lhs);
-		if (!err) {
-			list_splice(&blocks_lhs, blocks);
-			return 0;
-		} else if (err != -ENOSPC) {
+	for_each_free_tree(tree) {
+		struct rb_node *iter = rb_last(&mm->free_trees[tree][order]);
+
+		while (iter) {
+			block = rbtree_get_free_block(iter);
+
+			/* Allocate blocks traversing RHS */
+			rhs_offset = drm_buddy_block_offset(block);
+			err =  __drm_buddy_alloc_range(mm, rhs_offset, size,
+						       &filled, blocks);
+			if (!err || err != -ENOSPC)
+				return err;
+
+			lhs_size = max((size - filled), min_block_size);
+			if (!IS_ALIGNED(lhs_size, min_block_size))
+				lhs_size = round_up(lhs_size, min_block_size);
+
+			/* Allocate blocks traversing LHS */
+			lhs_offset = drm_buddy_block_offset(block) - lhs_size;
+			err =  __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
+						       NULL, &blocks_lhs);
+			if (!err) {
+				list_splice(&blocks_lhs, blocks);
+				return 0;
+			} else if (err != -ENOSPC) {
+				drm_buddy_free_list_internal(mm, blocks);
+				return err;
+			}
+			/* Free blocks for the next iteration */
 			drm_buddy_free_list_internal(mm, blocks);
-			return err;
-		}
-		/* Free blocks for the next iteration */
-		drm_buddy_free_list_internal(mm, blocks);
 
-		iter = rb_prev(iter);
+			iter = rb_prev(iter);
+		}
 	}
 
 	return -ENOSPC;
@@ -1239,6 +1274,7 @@ EXPORT_SYMBOL(drm_buddy_block_print);
  */
 void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
 {
+	enum drm_buddy_free_tree tree;
 	int order;
 
 	drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
@@ -1246,11 +1282,16 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
 
 	for (order = mm->max_order; order >= 0; order--) {
 		struct drm_buddy_block *block, *tmp;
+		struct rb_root *root;
 		u64 count = 0, free;
 
-		rbtree_postorder_for_each_entry_safe(block, tmp, &mm->free_tree[order], rb) {
-			BUG_ON(!drm_buddy_block_is_free(block));
-			count++;
+		for_each_free_tree(tree) {
+			root = &mm->free_trees[tree][order];
+
+			rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
+				BUG_ON(!drm_buddy_block_is_free(block));
+				count++;
+			}
 		}
 
 		drm_printf(p, "order-%2d ", order);
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index 9ee105d4309f..d7891d08f67a 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -73,7 +73,7 @@ struct drm_buddy_block {
  */
 struct drm_buddy {
 	/* Maintain a free list for each order. */
-	struct rb_root *free_tree;
+	struct rb_root **free_trees;
 
 	/*
 	 * Maintain explicit binary tree(s) to track the allocation of the
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v7 3/3] drm/buddy: Add KUnit tests for allocator performance under fragmentation
  2025-09-23  9:02 [PATCH v7 1/3] drm/buddy: Optimize free block management with RB tree Arunpravin Paneer Selvam
  2025-09-23  9:02 ` [PATCH v7 2/3] drm/buddy: Separate clear and dirty free block trees Arunpravin Paneer Selvam
@ 2025-09-23  9:02 ` Arunpravin Paneer Selvam
  2025-09-26 11:00   ` Matthew Auld
  2025-09-23  9:52 ` ✓ i915.CI.BAT: success for series starting with [v7,1/3] drm/buddy: Optimize free block management with RB tree Patchwork
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 11+ messages in thread
From: Arunpravin Paneer Selvam @ 2025-09-23  9:02 UTC (permalink / raw)
  To: christian.koenig, matthew.auld, dri-devel, amd-gfx, intel-gfx,
	intel-xe
  Cc: alexander.deucher, jani.nikula, peterz, samuel.pitoiset,
	Arunpravin Paneer Selvam

Add KUnit test cases that create severe memory fragmentation and
measure allocation/free performance.

The tests simulate two scenarios -

1. Allocation under severe fragmentation
   - Allocate the entire 4 GiB space as 8 KiB blocks with 64 KiB alignment,
     split them into two groups and free with mixed flags to block coalescing.
   - Repeatedly allocate and free 64 KiB blocks while timing the loop.
   - Freelist runtime: 76475 ms(76.5 seconds), soft-lockup triggered.
     RB-tree runtime: 186 ms.

2. Reverse free order under fragmentation
   - Create a similarly fragmented space, free half the blocks, reverse
     the order of the remainder, and release them with the cleared flag.
   - Freelist runtime: 85620 ms(85.6 seconds).
     RB-tree runtime: 114 ms.

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
---
 drivers/gpu/drm/tests/drm_buddy_test.c | 110 +++++++++++++++++++++++++
 1 file changed, 110 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c
index 7a0e523651f0..19b49fb6ec19 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -21,6 +21,115 @@ static inline u64 get_size(int order, u64 chunk_size)
 	return (1 << order) * chunk_size;
 }
 
+static void drm_test_buddy_fragmentation_performance(struct kunit *test)
+{
+	const unsigned long max_acceptable_time_ms = 1000;
+	struct drm_buddy_block *block, *tmp;
+	int num_blocks, i, ret, count = 0;
+	LIST_HEAD(allocated_blocks);
+	unsigned long elapsed_ms;
+	LIST_HEAD(reverse_list);
+	LIST_HEAD(test_blocks);
+	LIST_HEAD(clear_list);
+	LIST_HEAD(dirty_list);
+	LIST_HEAD(free_list);
+	struct drm_buddy mm;
+	u64 mm_size = SZ_4G;
+	ktime_t start, end;
+
+	/*
+	 * Allocation under severe fragmentation
+	 *
+	 * Create severe fragmentation by allocating the entire 4 GiB address space
+	 * as tiny 8 KiB blocks but forcing a 64 KiB alignment. The resulting pattern
+	 * leaves many scattered holes. Split the allocations into two groups and
+	 * return them with different flags to block coalescing, then repeatedly
+	 * allocate and free 64 KiB blocks while timing the loop. This stresses how
+	 * quickly the allocator can satisfy larger, aligned requests from a pool of
+	 * highly fragmented space.
+	 */
+	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
+			       "buddy_init failed\n");
+
+	num_blocks = mm_size / SZ_64K;
+
+	start = ktime_get();
+	/* Allocate with maximum fragmentation - 8K blocks with 64K alignment */
+	for (i = 0; i < num_blocks; i++)
+		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
+								    &allocated_blocks, 0),
+					"buddy_alloc hit an error size=%u\n", SZ_8K);
+
+	list_for_each_entry_safe(block, tmp, &allocated_blocks, link) {
+		if (count % 4 == 0 || count % 4 == 3)
+			list_move_tail(&block->link, &clear_list);
+		else
+			list_move_tail(&block->link, &dirty_list);
+		count++;
+	}
+
+	/* Free with different flags to ensure no coalescing */
+	drm_buddy_free_list(&mm, &clear_list, DRM_BUDDY_CLEARED);
+	drm_buddy_free_list(&mm, &dirty_list, 0);
+
+	for (i = 0; i < num_blocks; i++)
+		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_64K, SZ_64K,
+								    &test_blocks, 0),
+					"buddy_alloc hit an error size=%u\n", SZ_64K);
+	drm_buddy_free_list(&mm, &test_blocks, 0);
+
+	end = ktime_get();
+	elapsed_ms = ktime_to_ms(ktime_sub(end, start));
+	/* Performance validation */
+	KUNIT_EXPECT_LT_MSG(test, elapsed_ms, max_acceptable_time_ms,
+			    "Fragmented allocation took %lu ms (max acceptable: %lu ms)",
+			    elapsed_ms, max_acceptable_time_ms);
+	drm_buddy_fini(&mm);
+
+	/*
+	 * Reverse free order under fragmentation
+	 *
+	 * Construct a fragmented 4 GiB space by allocating every 8 KiB block with
+	 * 64 KiB alignment, creating a dense scatter of small regions. Half of the
+	 * blocks are selectively freed to form sparse gaps, while the remaining
+	 * allocations are preserved, reordered in reverse, and released back with
+	 * the cleared flag. This models a pathological reverse-ordered free pattern
+	 * and measures how quickly the allocator can merge and reclaim space when
+	 * deallocation occurs in the opposite order of allocation, exposing the
+	 * cost difference between a linear freelist scan and an ordered tree lookup.
+	 */
+	ret = drm_buddy_init(&mm, mm_size, SZ_4K);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	start = ktime_get();
+	/* Allocate maximum fragmentation */
+	for (i = 0; i < num_blocks; i++)
+		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
+								    &allocated_blocks, 0),
+					"buddy_alloc hit an error size=%u\n", SZ_8K);
+
+	list_for_each_entry_safe(block, tmp, &allocated_blocks, link) {
+		if (count % 2 == 0)
+			list_move_tail(&block->link, &free_list);
+		count++;
+	}
+	drm_buddy_free_list(&mm, &free_list, DRM_BUDDY_CLEARED);
+
+	list_for_each_entry_safe_reverse(block, tmp, &allocated_blocks, link)
+		list_move(&block->link, &reverse_list);
+	drm_buddy_free_list(&mm, &reverse_list, DRM_BUDDY_CLEARED);
+
+	end = ktime_get();
+	elapsed_ms = ktime_to_ms(ktime_sub(end, start));
+
+	/* Performance validation */
+	KUNIT_EXPECT_LT_MSG(test, elapsed_ms, max_acceptable_time_ms,
+			    "Reverse-ordered free took %lu ms (max acceptable: %lu ms)",
+			    elapsed_ms, max_acceptable_time_ms);
+
+	drm_buddy_fini(&mm);
+}
+
 static void drm_test_buddy_alloc_range_bias(struct kunit *test)
 {
 	u32 mm_size, size, ps, bias_size, bias_start, bias_end, bias_rem;
@@ -772,6 +881,7 @@ static struct kunit_case drm_buddy_tests[] = {
 	KUNIT_CASE(drm_test_buddy_alloc_contiguous),
 	KUNIT_CASE(drm_test_buddy_alloc_clear),
 	KUNIT_CASE(drm_test_buddy_alloc_range_bias),
+	KUNIT_CASE(drm_test_buddy_fragmentation_performance),
 	{}
 };
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* ✓ i915.CI.BAT: success for series starting with [v7,1/3] drm/buddy: Optimize free block management with RB tree
  2025-09-23  9:02 [PATCH v7 1/3] drm/buddy: Optimize free block management with RB tree Arunpravin Paneer Selvam
  2025-09-23  9:02 ` [PATCH v7 2/3] drm/buddy: Separate clear and dirty free block trees Arunpravin Paneer Selvam
  2025-09-23  9:02 ` [PATCH v7 3/3] drm/buddy: Add KUnit tests for allocator performance under fragmentation Arunpravin Paneer Selvam
@ 2025-09-23  9:52 ` Patchwork
  2025-09-23 10:58 ` ✗ i915.CI.Full: failure " Patchwork
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2025-09-23  9:52 UTC (permalink / raw)
  To: Arunpravin Paneer Selvam; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 1874 bytes --]

== Series Details ==

Series: series starting with [v7,1/3] drm/buddy: Optimize free block management with RB tree
URL   : https://patchwork.freedesktop.org/series/154888/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_17254 -> Patchwork_154888v1
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/index.html

Participating hosts (39 -> 38)
------------------------------

  Missing    (1): fi-kbl-guc 

Known issues
------------

  Here are the changes found in Patchwork_154888v1 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@kms_force_connector_basic@force-edid:
    - bat-kbl-2:          NOTRUN -> [ABORT][1] ([i915#15020])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/bat-kbl-2/igt@kms_force_connector_basic@force-edid.html

  
#### Possible fixes ####

  * igt@kms_force_connector_basic@force-connector-state:
    - bat-kbl-2:          [ABORT][2] ([i915#15020]) -> [PASS][3]
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/bat-kbl-2/igt@kms_force_connector_basic@force-connector-state.html
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/bat-kbl-2/igt@kms_force_connector_basic@force-connector-state.html

  
  [i915#15020]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/15020


Build changes
-------------

  * Linux: CI_DRM_17254 -> Patchwork_154888v1

  CI-20190529: 20190529
  CI_DRM_17254: 3ff214d6c6a86025aa3feadcb5bba4abfc2dd8f1 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_8547: 8547
  Patchwork_154888v1: 3ff214d6c6a86025aa3feadcb5bba4abfc2dd8f1 @ git://anongit.freedesktop.org/gfx-ci/linux

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/index.html

[-- Attachment #2: Type: text/html, Size: 2561 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* ✗ i915.CI.Full: failure for series starting with [v7,1/3] drm/buddy: Optimize free block management with RB tree
  2025-09-23  9:02 [PATCH v7 1/3] drm/buddy: Optimize free block management with RB tree Arunpravin Paneer Selvam
                   ` (2 preceding siblings ...)
  2025-09-23  9:52 ` ✓ i915.CI.BAT: success for series starting with [v7,1/3] drm/buddy: Optimize free block management with RB tree Patchwork
@ 2025-09-23 10:58 ` Patchwork
  2025-09-26  5:41 ` [PATCH v7 1/3] " Arunpravin Paneer Selvam
  2025-09-26 16:05 ` Matthew Auld
  5 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2025-09-23 10:58 UTC (permalink / raw)
  To: Arunpravin Paneer Selvam; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 32233 bytes --]

== Series Details ==

Series: series starting with [v7,1/3] drm/buddy: Optimize free block management with RB tree
URL   : https://patchwork.freedesktop.org/series/154888/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_17254_full -> Patchwork_154888v1_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_154888v1_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_154888v1_full, please notify your bug team (I915-ci-infra@lists.freedesktop.org) to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Participating hosts (12 -> 12)
------------------------------

  No changes in participating hosts

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_154888v1_full:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_lmem_swapping@heavy-multi:
    - shard-dg2:          NOTRUN -> [SKIP][1] +2 other tests skip
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-dg2-11/igt@gem_lmem_swapping@heavy-multi.html

  * igt@i915_suspend@basic-s3-without-i915:
    - shard-dg2:          NOTRUN -> [WARN][2]
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-dg2-11/igt@i915_suspend@basic-s3-without-i915.html

  
Known issues
------------

  Here are the changes found in Patchwork_154888v1_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@fbdev@eof:
    - shard-rkl:          NOTRUN -> [SKIP][3] ([i915#14544] / [i915#2582])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@fbdev@eof.html

  * igt@fbdev@nullptr:
    - shard-dg2:          NOTRUN -> [SKIP][4] ([i915#2582])
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-dg2-11/igt@fbdev@nullptr.html

  * igt@gem_ccs@ctrl-surf-copy:
    - shard-rkl:          NOTRUN -> [SKIP][5] ([i915#14544] / [i915#3555] / [i915#9323])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gem_ccs@ctrl-surf-copy.html

  * igt@gem_create@create-ext-cpu-access-big:
    - shard-rkl:          NOTRUN -> [SKIP][6] ([i915#14544] / [i915#6335])
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gem_create@create-ext-cpu-access-big.html

  * igt@gem_create@create-ext-set-pat:
    - shard-rkl:          NOTRUN -> [SKIP][7] ([i915#14544] / [i915#8562])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gem_create@create-ext-set-pat.html

  * igt@gem_ctx_param@get-priority-new-ctx:
    - shard-rkl:          [PASS][8] -> [DMESG-WARN][9] ([i915#12964])
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-rkl-6/igt@gem_ctx_param@get-priority-new-ctx.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gem_ctx_param@get-priority-new-ctx.html

  * igt@gem_ctx_sseu@invalid-sseu:
    - shard-rkl:          NOTRUN -> [SKIP][10] ([i915#14544] / [i915#280])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gem_ctx_sseu@invalid-sseu.html

  * igt@gem_exec_balancer@parallel-bb-first:
    - shard-rkl:          NOTRUN -> [SKIP][11] ([i915#14544] / [i915#4525]) +2 other tests skip
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gem_exec_balancer@parallel-bb-first.html

  * igt@gem_exec_reloc@basic-cpu-read:
    - shard-rkl:          NOTRUN -> [SKIP][12] ([i915#14544] / [i915#3281]) +7 other tests skip
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gem_exec_reloc@basic-cpu-read.html

  * igt@gem_exec_schedule@semaphore-power:
    - shard-rkl:          NOTRUN -> [SKIP][13] ([i915#14544] / [i915#7276])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gem_exec_schedule@semaphore-power.html

  * igt@gem_lmem_swapping@parallel-random-verify:
    - shard-rkl:          NOTRUN -> [SKIP][14] ([i915#14544] / [i915#4613]) +2 other tests skip
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gem_lmem_swapping@parallel-random-verify.html

  * igt@gem_mmap_gtt@basic-small-bo:
    - shard-dg2:          NOTRUN -> [SKIP][15] ([i915#2575]) +77 other tests skip
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-dg2-11/igt@gem_mmap_gtt@basic-small-bo.html

  * igt@gem_partial_pwrite_pread@writes-after-reads-uncached:
    - shard-rkl:          NOTRUN -> [SKIP][16] ([i915#14544] / [i915#3282]) +9 other tests skip
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gem_partial_pwrite_pread@writes-after-reads-uncached.html

  * igt@gem_pxp@create-valid-protected-context:
    - shard-rkl:          NOTRUN -> [TIMEOUT][17] ([i915#12964])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gem_pxp@create-valid-protected-context.html

  * igt@gem_pxp@verify-pxp-key-change-after-suspend-resume:
    - shard-rkl:          NOTRUN -> [SKIP][18] ([i915#14544] / [i915#4270])
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gem_pxp@verify-pxp-key-change-after-suspend-resume.html

  * igt@gem_pxp@verify-pxp-stale-ctx-execution:
    - shard-rkl:          NOTRUN -> [TIMEOUT][19] ([i915#12917] / [i915#12964])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gem_pxp@verify-pxp-stale-ctx-execution.html

  * igt@gem_render_copy@y-tiled-mc-ccs-to-yf-tiled-ccs:
    - shard-dg2:          NOTRUN -> [SKIP][20] ([i915#2575] / [i915#5190]) +2 other tests skip
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-dg2-11/igt@gem_render_copy@y-tiled-mc-ccs-to-yf-tiled-ccs.html

  * igt@gem_set_tiling_vs_blt@tiled-to-untiled:
    - shard-rkl:          NOTRUN -> [SKIP][21] ([i915#14544] / [i915#8411])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gem_set_tiling_vs_blt@tiled-to-untiled.html

  * igt@gem_userptr_blits@forbidden-operations:
    - shard-rkl:          NOTRUN -> [SKIP][22] ([i915#14544] / [i915#3282] / [i915#3297])
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gem_userptr_blits@forbidden-operations.html

  * igt@gem_userptr_blits@stress-purge:
    - shard-rkl:          NOTRUN -> [DMESG-WARN][23] ([i915#12964])
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gem_userptr_blits@stress-purge.html

  * igt@gem_userptr_blits@unsync-unmap-after-close:
    - shard-rkl:          NOTRUN -> [SKIP][24] ([i915#14544] / [i915#3297]) +1 other test skip
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gem_userptr_blits@unsync-unmap-after-close.html

  * igt@gen7_exec_parse@chained-batch:
    - shard-rkl:          NOTRUN -> [SKIP][25] ([i915#14544]) +78 other tests skip
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gen7_exec_parse@chained-batch.html

  * igt@gen9_exec_parse@bb-chained:
    - shard-rkl:          NOTRUN -> [SKIP][26] ([i915#14544] / [i915#2527]) +3 other tests skip
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gen9_exec_parse@bb-chained.html

  * igt@i915_drm_fdinfo@busy:
    - shard-dg2:          NOTRUN -> [SKIP][27] ([i915#14959]) +8 other tests skip
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-dg2-11/igt@i915_drm_fdinfo@busy.html

  * igt@i915_pm_freq_mult@media-freq@gt0:
    - shard-rkl:          NOTRUN -> [SKIP][28] ([i915#14544] / [i915#6590]) +1 other test skip
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@i915_pm_freq_mult@media-freq@gt0.html

  * igt@i915_pm_rpm@gem-execbuf-stress-pc8:
    - shard-dg2:          NOTRUN -> [SKIP][29] ([i915#14962])
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-dg2-11/igt@i915_pm_rpm@gem-execbuf-stress-pc8.html

  * igt@kms_big_fb@yf-tiled-64bpp-rotate-0:
    - shard-dg2:          NOTRUN -> [SKIP][30] ([i915#5190]) +7 other tests skip
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-dg2-11/igt@kms_big_fb@yf-tiled-64bpp-rotate-0.html

  * igt@kms_cdclk@mode-transition:
    - shard-rkl:          NOTRUN -> [SKIP][31] ([i915#14544] / [i915#3742])
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_cdclk@mode-transition.html

  * igt@kms_chamelium_hpd@dp-hpd:
    - shard-rkl:          NOTRUN -> [SKIP][32] ([i915#11151] / [i915#14544] / [i915#7828]) +5 other tests skip
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_chamelium_hpd@dp-hpd.html

  * igt@kms_color@legacy-gamma:
    - shard-rkl:          NOTRUN -> [SKIP][33] ([i915#12655] / [i915#14544]) +2 other tests skip
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_color@legacy-gamma.html

  * igt@kms_dsc@dsc-fractional-bpp:
    - shard-dg2:          NOTRUN -> [SKIP][34] +74 other tests skip
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-dg2-11/igt@kms_dsc@dsc-fractional-bpp.html

  * igt@kms_fbcon_fbt@fbc:
    - shard-rkl:          NOTRUN -> [SKIP][35] ([i915#14544] / [i915#14561])
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_fbcon_fbt@fbc.html

  * igt@kms_fbcon_fbt@psr:
    - shard-rkl:          NOTRUN -> [SKIP][36] ([i915#14544] / [i915#3955])
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_fbcon_fbt@psr.html

  * igt@kms_feature_discovery@chamelium:
    - shard-rkl:          NOTRUN -> [SKIP][37] ([i915#14544] / [i915#4854])
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_feature_discovery@chamelium.html

  * igt@kms_feature_discovery@display-2x:
    - shard-rkl:          NOTRUN -> [SKIP][38] ([i915#14544] / [i915#1839])
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_feature_discovery@display-2x.html

  * igt@kms_flip@2x-flip-vs-absolute-wf_vblank-interruptible:
    - shard-rkl:          NOTRUN -> [SKIP][39] ([i915#14544] / [i915#9934]) +2 other tests skip
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_flip@2x-flip-vs-absolute-wf_vblank-interruptible.html

  * igt@kms_flip@flip-vs-fences-interruptible:
    - shard-rkl:          NOTRUN -> [SKIP][40] ([i915#14544] / [i915#3637]) +2 other tests skip
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_flip@flip-vs-fences-interruptible.html

  * igt@kms_flip_scaled_crc@flip-64bpp-linear-to-32bpp-linear-upscaling:
    - shard-rkl:          NOTRUN -> [SKIP][41] ([i915#14544] / [i915#3555]) +5 other tests skip
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_flip_scaled_crc@flip-64bpp-linear-to-32bpp-linear-upscaling.html

  * igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-shrfb-pgflip-blt:
    - shard-rkl:          NOTRUN -> [SKIP][42] ([i915#14544] / [i915#1849] / [i915#5354]) +53 other tests skip
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-shrfb-pgflip-blt.html

  * igt@kms_invalid_mode@overflow-vrefresh:
    - shard-rkl:          NOTRUN -> [SKIP][43] ([i915#14544] / [i915#8826])
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_invalid_mode@overflow-vrefresh.html

  * igt@kms_joiner@basic-big-joiner:
    - shard-rkl:          NOTRUN -> [SKIP][44] ([i915#10656] / [i915#14544]) +1 other test skip
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_joiner@basic-big-joiner.html

  * igt@kms_joiner@invalid-modeset-force-big-joiner:
    - shard-rkl:          NOTRUN -> [SKIP][45] ([i915#12388] / [i915#14544])
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_joiner@invalid-modeset-force-big-joiner.html

  * igt@kms_pipe_crc_basic@read-crc:
    - shard-rkl:          NOTRUN -> [SKIP][46] ([i915#11190] / [i915#14544])
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_pipe_crc_basic@read-crc.html

  * igt@kms_plane@plane-panning-top-left:
    - shard-rkl:          NOTRUN -> [SKIP][47] ([i915#14544] / [i915#8825]) +1 other test skip
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_plane@plane-panning-top-left.html

  * igt@kms_plane_alpha_blend@constant-alpha-mid:
    - shard-rkl:          NOTRUN -> [SKIP][48] ([i915#14544] / [i915#7294])
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_plane_alpha_blend@constant-alpha-mid.html

  * igt@kms_plane_scaling@invalid-num-scalers:
    - shard-rkl:          NOTRUN -> [SKIP][49] ([i915#14544] / [i915#3555] / [i915#6953] / [i915#8152])
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_plane_scaling@invalid-num-scalers.html

  * igt@kms_plane_scaling@plane-upscale-20x20-with-pixel-format:
    - shard-dg2:          NOTRUN -> [SKIP][50] ([i915#14958] / [i915#9423]) +2 other tests skip
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-dg2-11/igt@kms_plane_scaling@plane-upscale-20x20-with-pixel-format.html

  * igt@kms_plane_scaling@planes-downscale-factor-0-75-upscale-factor-0-25:
    - shard-rkl:          NOTRUN -> [SKIP][51] ([i915#14544] / [i915#6953] / [i915#8152]) +1 other test skip
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_plane_scaling@planes-downscale-factor-0-75-upscale-factor-0-25.html

  * igt@kms_plane_scaling@planes-scaler-unity-scaling:
    - shard-rkl:          NOTRUN -> [SKIP][52] ([i915#14544] / [i915#3555] / [i915#8152])
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_plane_scaling@planes-scaler-unity-scaling.html

  * igt@kms_plane_scaling@planes-scaler-unity-scaling@pipe-a:
    - shard-rkl:          NOTRUN -> [SKIP][53] ([i915#12247] / [i915#14544]) +2 other tests skip
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_plane_scaling@planes-scaler-unity-scaling@pipe-a.html

  * igt@kms_plane_scaling@planes-unity-scaling-downscale-factor-0-5@pipe-b:
    - shard-rkl:          NOTRUN -> [SKIP][54] ([i915#12247] / [i915#14544] / [i915#8152]) +2 other tests skip
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_plane_scaling@planes-unity-scaling-downscale-factor-0-5@pipe-b.html

  * igt@kms_pm_dc@dc5-dpms-negative:
    - shard-rkl:          NOTRUN -> [SKIP][55] ([i915#13441] / [i915#14544])
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_pm_dc@dc5-dpms-negative.html

  * igt@kms_pm_rpm@fences-dpms:
    - shard-rkl:          NOTRUN -> [SKIP][56] ([i915#14544] / [i915#1849])
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_pm_rpm@fences-dpms.html

  * igt@kms_pm_rpm@modeset-lpsp-stress-no-wait:
    - shard-dg2:          NOTRUN -> [SKIP][57] ([i915#14960])
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-dg2-11/igt@kms_pm_rpm@modeset-lpsp-stress-no-wait.html

  * igt@kms_pm_rpm@modeset-non-lpsp-stress-no-wait:
    - shard-rkl:          NOTRUN -> [SKIP][58] ([i915#14544] / [i915#9519])
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_pm_rpm@modeset-non-lpsp-stress-no-wait.html

  * igt@kms_prime@d3hot:
    - shard-dg2:          NOTRUN -> [SKIP][59] ([i915#15026])
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-dg2-11/igt@kms_prime@d3hot.html

  * igt@kms_psr2_sf@fbc-pr-overlay-primary-update-sf-dmg-area:
    - shard-rkl:          NOTRUN -> [SKIP][60] ([i915#11520] / [i915#14544]) +5 other tests skip
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_psr2_sf@fbc-pr-overlay-primary-update-sf-dmg-area.html

  * igt@kms_psr@fbc-psr2-sprite-render:
    - shard-rkl:          NOTRUN -> [SKIP][61] ([i915#1072] / [i915#14544] / [i915#9732]) +15 other tests skip
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_psr@fbc-psr2-sprite-render.html

  * igt@kms_psr_stress_test@invalidate-primary-flip-overlay:
    - shard-rkl:          NOTRUN -> [SKIP][62] ([i915#14544] / [i915#9685])
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@kms_psr_stress_test@invalidate-primary-flip-overlay.html

  * igt@kms_rotation_crc@primary-yf-tiled-reflect-x-180:
    - shard-dg2:          NOTRUN -> [SKIP][63] ([i915#14958] / [i915#5190])
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-dg2-11/igt@kms_rotation_crc@primary-yf-tiled-reflect-x-180.html

  * igt@kms_vblank@ts-continuation-modeset:
    - shard-dg2:          NOTRUN -> [SKIP][64] ([i915#14958]) +72 other tests skip
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-dg2-11/igt@kms_vblank@ts-continuation-modeset.html

  * igt@prime_vgem@basic-write:
    - shard-rkl:          NOTRUN -> [SKIP][65] ([i915#14544] / [i915#3291] / [i915#3708])
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@prime_vgem@basic-write.html

  * igt@prime_vgem@coherency-gtt:
    - shard-rkl:          NOTRUN -> [SKIP][66] ([i915#14544] / [i915#3708])
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@prime_vgem@coherency-gtt.html

  
#### Possible fixes ####

  * igt@gem_exec_schedule@smoketest-all:
    - shard-rkl:          [DMESG-WARN][67] ([i915#12964]) -> [PASS][68]
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-rkl-6/igt@gem_exec_schedule@smoketest-all.html
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-rkl-6/igt@gem_exec_schedule@smoketest-all.html

  
#### Warnings ####

  * igt@i915_module_load@load:
    - shard-snb:          ([ABORT][69], [ABORT][70], [ABORT][71], [ABORT][72], [ABORT][73], [ABORT][74], [ABORT][75], [ABORT][76], [ABORT][77], [ABORT][78], [ABORT][79], [ABORT][80], [ABORT][81], [ABORT][82], [ABORT][83], [ABORT][84], [ABORT][85], [ABORT][86], [ABORT][87]) ([i915#15022]) -> ([ABORT][88], [ABORT][89], [ABORT][90], [ABORT][91], [ABORT][92], [ABORT][93], [ABORT][94], [ABORT][95], [ABORT][96], [ABORT][97], [ABORT][98], [ABORT][99], [ABORT][100], [ABORT][101], [ABORT][102], [ABORT][103], [ABORT][104], [ABORT][105], [ABORT][106], [ABORT][107], [ABORT][108], [ABORT][109], [ABORT][110], [ABORT][111], [ABORT][112]) ([i915#15020] / [i915#15022])
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-snb5/igt@i915_module_load@load.html
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-snb5/igt@i915_module_load@load.html
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-snb5/igt@i915_module_load@load.html
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-snb5/igt@i915_module_load@load.html
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-snb6/igt@i915_module_load@load.html
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-snb4/igt@i915_module_load@load.html
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-snb4/igt@i915_module_load@load.html
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-snb6/igt@i915_module_load@load.html
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-snb4/igt@i915_module_load@load.html
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-snb6/igt@i915_module_load@load.html
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-snb4/igt@i915_module_load@load.html
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-snb7/igt@i915_module_load@load.html
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-snb1/igt@i915_module_load@load.html
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-snb7/igt@i915_module_load@load.html
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-snb7/igt@i915_module_load@load.html
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-snb1/igt@i915_module_load@load.html
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-snb1/igt@i915_module_load@load.html
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-snb1/igt@i915_module_load@load.html
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-snb7/igt@i915_module_load@load.html
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb7/igt@i915_module_load@load.html
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb5/igt@i915_module_load@load.html
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb1/igt@i915_module_load@load.html
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb6/igt@i915_module_load@load.html
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb7/igt@i915_module_load@load.html
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb7/igt@i915_module_load@load.html
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb4/igt@i915_module_load@load.html
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb4/igt@i915_module_load@load.html
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb1/igt@i915_module_load@load.html
   [97]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb6/igt@i915_module_load@load.html
   [98]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb4/igt@i915_module_load@load.html
   [99]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb4/igt@i915_module_load@load.html
   [100]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb5/igt@i915_module_load@load.html
   [101]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb5/igt@i915_module_load@load.html
   [102]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb5/igt@i915_module_load@load.html
   [103]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb4/igt@i915_module_load@load.html
   [104]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb6/igt@i915_module_load@load.html
   [105]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb7/igt@i915_module_load@load.html
   [106]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb7/igt@i915_module_load@load.html
   [107]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb1/igt@i915_module_load@load.html
   [108]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb6/igt@i915_module_load@load.html
   [109]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb1/igt@i915_module_load@load.html
   [110]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb5/igt@i915_module_load@load.html
   [111]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb6/igt@i915_module_load@load.html
   [112]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-snb1/igt@i915_module_load@load.html
    - shard-glk:          ([ABORT][113], [ABORT][114], [ABORT][115], [PASS][116], [ABORT][117], [ABORT][118], [ABORT][119], [ABORT][120], [ABORT][121], [ABORT][122], [ABORT][123], [ABORT][124]) ([i915#15020]) -> ([ABORT][125], [ABORT][126], [ABORT][127], [ABORT][128], [ABORT][129], [ABORT][130], [ABORT][131]) ([i915#15020])
   [113]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-glk5/igt@i915_module_load@load.html
   [114]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-glk6/igt@i915_module_load@load.html
   [115]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-glk5/igt@i915_module_load@load.html
   [116]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-glk6/igt@i915_module_load@load.html
   [117]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-glk6/igt@i915_module_load@load.html
   [118]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-glk5/igt@i915_module_load@load.html
   [119]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-glk1/igt@i915_module_load@load.html
   [120]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-glk8/igt@i915_module_load@load.html
   [121]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-glk1/igt@i915_module_load@load.html
   [122]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-glk8/igt@i915_module_load@load.html
   [123]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-glk1/igt@i915_module_load@load.html
   [124]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-glk1/igt@i915_module_load@load.html
   [125]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-glk1/igt@i915_module_load@load.html
   [126]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-glk5/igt@i915_module_load@load.html
   [127]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-glk6/igt@i915_module_load@load.html
   [128]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-glk6/igt@i915_module_load@load.html
   [129]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-glk5/igt@i915_module_load@load.html
   [130]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-glk1/igt@i915_module_load@load.html
   [131]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-glk1/igt@i915_module_load@load.html

  * igt@kms_plane_scaling@planes-downscale-factor-0-5-upscale-20x20:
    - shard-dg2:          [SKIP][132] ([i915#9423]) -> [SKIP][133] ([i915#14958] / [i915#9423]) +2 other tests skip
   [132]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-dg2-11/igt@kms_plane_scaling@planes-downscale-factor-0-5-upscale-20x20.html
   [133]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-dg2-11/igt@kms_plane_scaling@planes-downscale-factor-0-5-upscale-20x20.html

  * igt@kms_rotation_crc@primary-y-tiled-reflect-x-270:
    - shard-dg2:          [SKIP][134] ([i915#5190]) -> [SKIP][135] ([i915#14958] / [i915#5190])
   [134]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17254/shard-dg2-11/igt@kms_rotation_crc@primary-y-tiled-reflect-x-270.html
   [135]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/shard-dg2-11/igt@kms_rotation_crc@primary-y-tiled-reflect-x-270.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [i915#10656]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/10656
  [i915#1072]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/1072
  [i915#11151]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11151
  [i915#11190]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11190
  [i915#11520]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11520
  [i915#12247]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12247
  [i915#12388]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12388
  [i915#12655]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12655
  [i915#12917]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12917
  [i915#12964]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12964
  [i915#13441]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13441
  [i915#14544]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14544
  [i915#14561]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14561
  [i915#14958]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14958
  [i915#14959]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14959
  [i915#14960]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14960
  [i915#14962]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14962
  [i915#15020]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/15020
  [i915#15022]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/15022
  [i915#15026]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/15026
  [i915#1839]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/1839
  [i915#1849]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/1849
  [i915#2527]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/2527
  [i915#2575]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/2575
  [i915#2582]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/2582
  [i915#280]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/280
  [i915#3281]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/3281
  [i915#3282]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/3282
  [i915#3291]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/3291
  [i915#3297]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/3297
  [i915#3555]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/3555
  [i915#3637]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/3637
  [i915#3708]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/3708
  [i915#3742]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/3742
  [i915#3955]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/3955
  [i915#4270]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/4270
  [i915#4525]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/4525
  [i915#4613]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/4613
  [i915#4854]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/4854
  [i915#5190]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/5190
  [i915#5354]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/5354
  [i915#6335]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/6335
  [i915#6590]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/6590
  [i915#6953]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/6953
  [i915#7276]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/7276
  [i915#7294]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/7294
  [i915#7828]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/7828
  [i915#8152]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/8152
  [i915#8411]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/8411
  [i915#8562]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/8562
  [i915#8825]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/8825
  [i915#8826]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/8826
  [i915#9323]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/9323
  [i915#9423]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/9423
  [i915#9519]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/9519
  [i915#9685]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/9685
  [i915#9732]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/9732
  [i915#9934]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/9934


Build changes
-------------

  * Linux: CI_DRM_17254 -> Patchwork_154888v1

  CI-20190529: 20190529
  CI_DRM_17254: 3ff214d6c6a86025aa3feadcb5bba4abfc2dd8f1 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_8547: 8547
  Patchwork_154888v1: 3ff214d6c6a86025aa3feadcb5bba4abfc2dd8f1 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_154888v1/index.html

[-- Attachment #2: Type: text/html, Size: 40681 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v7 1/3] drm/buddy: Optimize free block management with RB tree
  2025-09-23  9:02 [PATCH v7 1/3] drm/buddy: Optimize free block management with RB tree Arunpravin Paneer Selvam
                   ` (3 preceding siblings ...)
  2025-09-23 10:58 ` ✗ i915.CI.Full: failure " Patchwork
@ 2025-09-26  5:41 ` Arunpravin Paneer Selvam
  2025-09-26 16:05 ` Matthew Auld
  5 siblings, 0 replies; 11+ messages in thread
From: Arunpravin Paneer Selvam @ 2025-09-26  5:41 UTC (permalink / raw)
  To: christian.koenig, matthew.auld, dri-devel, amd-gfx, intel-gfx,
	intel-xe
  Cc: alexander.deucher, jani.nikula, peterz, samuel.pitoiset, stable

Hi Matthew,

Ping ?

Regards,
Arun.

On 9/23/2025 2:32 PM, Arunpravin Paneer Selvam wrote:
> Replace the freelist (O(n)) used for free block management with a
> red-black tree, providing more efficient O(log n) search, insert,
> and delete operations. This improves scalability and performance
> when managing large numbers of free blocks per order (e.g., hundreds
> or thousands).
>
> In the VK-CTS memory stress subtest, the buddy manager merges
> fragmented memory and inserts freed blocks into the freelist. Since
> freelist insertion is O(n), this becomes a bottleneck as fragmentation
> increases. Benchmarking shows list_insert_sorted() consumes ~52.69% CPU
> with the freelist, compared to just 0.03% with the RB tree
> (rbtree_insert.isra.0), despite performing the same sorted insert.
>
> This also improves performance in heavily fragmented workloads,
> such as games or graphics tests that stress memory.
>
> As the buddy allocator evolves with new features such as clear-page
> tracking, the resulting fragmentation and complexity have grown.
> These RB-tree based design changes are introduced to address that
> growth and ensure the allocator continues to perform efficiently
> under fragmented conditions.
>
> The RB tree implementation with separate clear/dirty trees provides:
> - O(n log n) aggregate complexity for all operations instead of O(n^2)
> - Elimination of soft lockups and system instability
> - Improved code maintainability and clarity
> - Better scalability for large memory systems
> - Predictable performance under fragmentation
>
> v3(Matthew):
>    - Remove RB_EMPTY_NODE check in force_merge function.
>    - Rename rb for loop macros to have less generic names and move to
>      .c file.
>    - Make the rb node rb and link field as union.
>
> v4(Jani Nikula):
>    - The kernel-doc comment should be "/**"
>    - Move all the rbtree macros to rbtree.h and add parens to ensure
>      correct precedence.
>
> v5:
>    - Remove the inline in a .c file (Jani Nikula).
>
> v6(Peter Zijlstra):
>    - Add rb_add() function replacing the existing rbtree_insert() code.
>
> v7:
>    - A full walk iteration in rbtree is slower than the list (Peter Zijlstra).
>    - The existing rbtree_postorder_for_each_entry_safe macro should be used
>      in scenarios where traversal order is not a critical factor (Christian).
>
> Cc: stable@vger.kernel.org
> Fixes: a68c7eaa7a8f ("drm/amdgpu: Enable clear page functionality")
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> ---
>   drivers/gpu/drm/drm_buddy.c | 188 ++++++++++++++++++++++--------------
>   include/drm/drm_buddy.h     |  11 ++-
>   2 files changed, 124 insertions(+), 75 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index a94061f373de..67aa67229cc3 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -14,6 +14,8 @@
>   
>   static struct kmem_cache *slab_blocks;
>   
> +#define rbtree_get_free_block(node) rb_entry((node), struct drm_buddy_block, rb)
> +
>   static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
>   					       struct drm_buddy_block *parent,
>   					       unsigned int order,
> @@ -31,6 +33,8 @@ static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
>   	block->header |= order;
>   	block->parent = parent;
>   
> +	RB_CLEAR_NODE(&block->rb);
> +
>   	BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
>   	return block;
>   }
> @@ -41,23 +45,49 @@ static void drm_block_free(struct drm_buddy *mm,
>   	kmem_cache_free(slab_blocks, block);
>   }
>   
> -static void list_insert_sorted(struct drm_buddy *mm,
> -			       struct drm_buddy_block *block)
> +static bool drm_buddy_block_offset_less(const struct drm_buddy_block *block,
> +					const struct drm_buddy_block *node)
>   {
> -	struct drm_buddy_block *node;
> -	struct list_head *head;
> +	return drm_buddy_block_offset(block) < drm_buddy_block_offset(node);
> +}
>   
> -	head = &mm->free_list[drm_buddy_block_order(block)];
> -	if (list_empty(head)) {
> -		list_add(&block->link, head);
> -		return;
> -	}
> +static bool rbtree_block_offset_less(struct rb_node *block,
> +				     const struct rb_node *node)
> +{
> +	return drm_buddy_block_offset_less(rbtree_get_free_block(block),
> +					   rbtree_get_free_block(node));
> +}
>   
> -	list_for_each_entry(node, head, link)
> -		if (drm_buddy_block_offset(block) < drm_buddy_block_offset(node))
> -			break;
> +static void rbtree_insert(struct drm_buddy *mm,
> +			  struct drm_buddy_block *block)
> +{
> +	rb_add(&block->rb,
> +	       &mm->free_tree[drm_buddy_block_order(block)],
> +	       rbtree_block_offset_less);
> +}
> +
> +static void rbtree_remove(struct drm_buddy *mm,
> +			  struct drm_buddy_block *block)
> +{
> +	struct rb_root *root;
> +
> +	root = &mm->free_tree[drm_buddy_block_order(block)];
> +	rb_erase(&block->rb, root);
>   
> -	__list_add(&block->link, node->link.prev, &node->link);
> +	RB_CLEAR_NODE(&block->rb);
> +}
> +
> +static struct drm_buddy_block *
> +rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
> +{
> +	struct rb_node *node = rb_last(&mm->free_tree[order]);
> +
> +	return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
> +}
> +
> +static bool rbtree_is_empty(struct drm_buddy *mm, unsigned int order)
> +{
> +	return RB_EMPTY_ROOT(&mm->free_tree[order]);
>   }
>   
>   static void clear_reset(struct drm_buddy_block *block)
> @@ -70,12 +100,13 @@ static void mark_cleared(struct drm_buddy_block *block)
>   	block->header |= DRM_BUDDY_HEADER_CLEAR;
>   }
>   
> -static void mark_allocated(struct drm_buddy_block *block)
> +static void mark_allocated(struct drm_buddy *mm,
> +			   struct drm_buddy_block *block)
>   {
>   	block->header &= ~DRM_BUDDY_HEADER_STATE;
>   	block->header |= DRM_BUDDY_ALLOCATED;
>   
> -	list_del(&block->link);
> +	rbtree_remove(mm, block);
>   }
>   
>   static void mark_free(struct drm_buddy *mm,
> @@ -84,15 +115,16 @@ static void mark_free(struct drm_buddy *mm,
>   	block->header &= ~DRM_BUDDY_HEADER_STATE;
>   	block->header |= DRM_BUDDY_FREE;
>   
> -	list_insert_sorted(mm, block);
> +	rbtree_insert(mm, block);
>   }
>   
> -static void mark_split(struct drm_buddy_block *block)
> +static void mark_split(struct drm_buddy *mm,
> +		       struct drm_buddy_block *block)
>   {
>   	block->header &= ~DRM_BUDDY_HEADER_STATE;
>   	block->header |= DRM_BUDDY_SPLIT;
>   
> -	list_del(&block->link);
> +	rbtree_remove(mm, block);
>   }
>   
>   static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
> @@ -148,7 +180,7 @@ static unsigned int __drm_buddy_free(struct drm_buddy *mm,
>   				mark_cleared(parent);
>   		}
>   
> -		list_del(&buddy->link);
> +		rbtree_remove(mm, buddy);
>   		if (force_merge && drm_buddy_block_is_clear(buddy))
>   			mm->clear_avail -= drm_buddy_block_size(mm, buddy);
>   
> @@ -179,13 +211,19 @@ static int __force_merge(struct drm_buddy *mm,
>   		return -EINVAL;
>   
>   	for (i = min_order - 1; i >= 0; i--) {
> -		struct drm_buddy_block *block, *prev;
> +		struct rb_root *root = &mm->free_tree[i];
> +		struct rb_node *iter;
> +
> +		iter = rb_last(root);
>   
> -		list_for_each_entry_safe_reverse(block, prev, &mm->free_list[i], link) {
> -			struct drm_buddy_block *buddy;
> +		while (iter) {
> +			struct drm_buddy_block *block, *buddy;
>   			u64 block_start, block_end;
>   
> -			if (!block->parent)
> +			block = rbtree_get_free_block(iter);
> +			iter = rb_prev(iter);
> +
> +			if (!block || !block->parent)
>   				continue;
>   
>   			block_start = drm_buddy_block_offset(block);
> @@ -201,15 +239,10 @@ static int __force_merge(struct drm_buddy *mm,
>   			WARN_ON(drm_buddy_block_is_clear(block) ==
>   				drm_buddy_block_is_clear(buddy));
>   
> -			/*
> -			 * If the prev block is same as buddy, don't access the
> -			 * block in the next iteration as we would free the
> -			 * buddy block as part of the free function.
> -			 */
> -			if (prev == buddy)
> -				prev = list_prev_entry(prev, link);
> +			if (iter == &buddy->rb)
> +				iter = rb_prev(iter);
>   
> -			list_del(&block->link);
> +			rbtree_remove(mm, block);
>   			if (drm_buddy_block_is_clear(block))
>   				mm->clear_avail -= drm_buddy_block_size(mm, block);
>   
> @@ -258,14 +291,14 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
>   
>   	BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
>   
> -	mm->free_list = kmalloc_array(mm->max_order + 1,
> -				      sizeof(struct list_head),
> +	mm->free_tree = kmalloc_array(mm->max_order + 1,
> +				      sizeof(struct rb_root),
>   				      GFP_KERNEL);
> -	if (!mm->free_list)
> +	if (!mm->free_tree)
>   		return -ENOMEM;
>   
>   	for (i = 0; i <= mm->max_order; ++i)
> -		INIT_LIST_HEAD(&mm->free_list[i]);
> +		mm->free_tree[i] = RB_ROOT;
>   
>   	mm->n_roots = hweight64(size);
>   
> @@ -273,7 +306,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
>   				  sizeof(struct drm_buddy_block *),
>   				  GFP_KERNEL);
>   	if (!mm->roots)
> -		goto out_free_list;
> +		goto out_free_tree;
>   
>   	offset = 0;
>   	i = 0;
> @@ -312,8 +345,8 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
>   	while (i--)
>   		drm_block_free(mm, mm->roots[i]);
>   	kfree(mm->roots);
> -out_free_list:
> -	kfree(mm->free_list);
> +out_free_tree:
> +	kfree(mm->free_tree);
>   	return -ENOMEM;
>   }
>   EXPORT_SYMBOL(drm_buddy_init);
> @@ -323,7 +356,7 @@ EXPORT_SYMBOL(drm_buddy_init);
>    *
>    * @mm: DRM buddy manager to free
>    *
> - * Cleanup memory manager resources and the freelist
> + * Cleanup memory manager resources and the freetree
>    */
>   void drm_buddy_fini(struct drm_buddy *mm)
>   {
> @@ -350,7 +383,7 @@ void drm_buddy_fini(struct drm_buddy *mm)
>   	WARN_ON(mm->avail != mm->size);
>   
>   	kfree(mm->roots);
> -	kfree(mm->free_list);
> +	kfree(mm->free_tree);
>   }
>   EXPORT_SYMBOL(drm_buddy_fini);
>   
> @@ -383,7 +416,7 @@ static int split_block(struct drm_buddy *mm,
>   		clear_reset(block);
>   	}
>   
> -	mark_split(block);
> +	mark_split(mm, block);
>   
>   	return 0;
>   }
> @@ -412,7 +445,7 @@ EXPORT_SYMBOL(drm_get_buddy);
>    * @is_clear: blocks clear state
>    *
>    * Reset the clear state based on @is_clear value for each block
> - * in the freelist.
> + * in the freetree.
>    */
>   void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
>   {
> @@ -431,9 +464,9 @@ void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
>   	}
>   
>   	for (i = 0; i <= mm->max_order; ++i) {
> -		struct drm_buddy_block *block;
> +		struct drm_buddy_block *block, *tmp;
>   
> -		list_for_each_entry_reverse(block, &mm->free_list[i], link) {
> +		rbtree_postorder_for_each_entry_safe(block, tmp, &mm->free_tree[i], rb) {
>   			if (is_clear != drm_buddy_block_is_clear(block)) {
>   				if (is_clear) {
>   					mark_cleared(block);
> @@ -639,14 +672,18 @@ get_maxblock(struct drm_buddy *mm, unsigned int order,
>   	unsigned int i;
>   
>   	for (i = order; i <= mm->max_order; ++i) {
> +		struct rb_node *iter = rb_last(&mm->free_tree[i]);
>   		struct drm_buddy_block *tmp_block;
>   
> -		list_for_each_entry_reverse(tmp_block, &mm->free_list[i], link) {
> -			if (block_incompatible(tmp_block, flags))
> -				continue;
> +		while (iter) {
> +			tmp_block = rbtree_get_free_block(iter);
>   
> -			block = tmp_block;
> -			break;
> +			if (!block_incompatible(tmp_block, flags)) {
> +				block = tmp_block;
> +				break;
> +			}
> +
> +			iter = rb_prev(iter);
>   		}
>   
>   		if (!block)
> @@ -667,7 +704,7 @@ get_maxblock(struct drm_buddy *mm, unsigned int order,
>   }
>   
>   static struct drm_buddy_block *
> -alloc_from_freelist(struct drm_buddy *mm,
> +alloc_from_freetree(struct drm_buddy *mm,
>   		    unsigned int order,
>   		    unsigned long flags)
>   {
> @@ -682,14 +719,18 @@ alloc_from_freelist(struct drm_buddy *mm,
>   			tmp = drm_buddy_block_order(block);
>   	} else {
>   		for (tmp = order; tmp <= mm->max_order; ++tmp) {
> +			struct rb_node *iter = rb_last(&mm->free_tree[tmp]);
>   			struct drm_buddy_block *tmp_block;
>   
> -			list_for_each_entry_reverse(tmp_block, &mm->free_list[tmp], link) {
> -				if (block_incompatible(tmp_block, flags))
> -					continue;
> +			while (iter) {
> +				tmp_block = rbtree_get_free_block(iter);
>   
> -				block = tmp_block;
> -				break;
> +				if (!block_incompatible(tmp_block, flags)) {
> +					block = tmp_block;
> +					break;
> +				}
> +
> +				iter = rb_prev(iter);
>   			}
>   
>   			if (block)
> @@ -700,10 +741,8 @@ alloc_from_freelist(struct drm_buddy *mm,
>   	if (!block) {
>   		/* Fallback method */
>   		for (tmp = order; tmp <= mm->max_order; ++tmp) {
> -			if (!list_empty(&mm->free_list[tmp])) {
> -				block = list_last_entry(&mm->free_list[tmp],
> -							struct drm_buddy_block,
> -							link);
> +			if (!rbtree_is_empty(mm, tmp)) {
> +				block = rbtree_last_entry(mm, tmp);
>   				if (block)
>   					break;
>   			}
> @@ -771,7 +810,7 @@ static int __alloc_range(struct drm_buddy *mm,
>   
>   		if (contains(start, end, block_start, block_end)) {
>   			if (drm_buddy_block_is_free(block)) {
> -				mark_allocated(block);
> +				mark_allocated(mm, block);
>   				total_allocated += drm_buddy_block_size(mm, block);
>   				mm->avail -= drm_buddy_block_size(mm, block);
>   				if (drm_buddy_block_is_clear(block))
> @@ -849,8 +888,8 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
>   {
>   	u64 rhs_offset, lhs_offset, lhs_size, filled;
>   	struct drm_buddy_block *block;
> -	struct list_head *list;
>   	LIST_HEAD(blocks_lhs);
> +	struct rb_node *iter;
>   	unsigned long pages;
>   	unsigned int order;
>   	u64 modify_size;
> @@ -862,11 +901,14 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
>   	if (order == 0)
>   		return -ENOSPC;
>   
> -	list = &mm->free_list[order];
> -	if (list_empty(list))
> +	if (rbtree_is_empty(mm, order))
>   		return -ENOSPC;
>   
> -	list_for_each_entry_reverse(block, list, link) {
> +	iter = rb_last(&mm->free_tree[order]);
> +
> +	while (iter) {
> +		block = rbtree_get_free_block(iter);
> +
>   		/* Allocate blocks traversing RHS */
>   		rhs_offset = drm_buddy_block_offset(block);
>   		err =  __drm_buddy_alloc_range(mm, rhs_offset, size,
> @@ -891,6 +933,8 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
>   		}
>   		/* Free blocks for the next iteration */
>   		drm_buddy_free_list_internal(mm, blocks);
> +
> +		iter = rb_prev(iter);
>   	}
>   
>   	return -ENOSPC;
> @@ -976,7 +1020,7 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>   	list_add(&block->tmp_link, &dfs);
>   	err =  __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
>   	if (err) {
> -		mark_allocated(block);
> +		mark_allocated(mm, block);
>   		mm->avail -= drm_buddy_block_size(mm, block);
>   		if (drm_buddy_block_is_clear(block))
>   			mm->clear_avail -= drm_buddy_block_size(mm, block);
> @@ -999,8 +1043,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   		return  __drm_buddy_alloc_range_bias(mm, start, end,
>   						     order, flags);
>   	else
> -		/* Allocate from freelist */
> -		return alloc_from_freelist(mm, order, flags);
> +		/* Allocate from freetree */
> +		return alloc_from_freetree(mm, order, flags);
>   }
>   
>   /**
> @@ -1017,8 +1061,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
>    * alloc_range_bias() called on range limitations, which traverses
>    * the tree and returns the desired block.
>    *
> - * alloc_from_freelist() called when *no* range restrictions
> - * are enforced, which picks the block from the freelist.
> + * alloc_from_freetree() called when *no* range restrictions
> + * are enforced, which picks the block from the freetree.
>    *
>    * Returns:
>    * 0 on success, error code on failure.
> @@ -1120,7 +1164,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   			}
>   		} while (1);
>   
> -		mark_allocated(block);
> +		mark_allocated(mm, block);
>   		mm->avail -= drm_buddy_block_size(mm, block);
>   		if (drm_buddy_block_is_clear(block))
>   			mm->clear_avail -= drm_buddy_block_size(mm, block);
> @@ -1201,10 +1245,10 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
>   		   mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
>   
>   	for (order = mm->max_order; order >= 0; order--) {
> -		struct drm_buddy_block *block;
> +		struct drm_buddy_block *block, *tmp;
>   		u64 count = 0, free;
>   
> -		list_for_each_entry(block, &mm->free_list[order], link) {
> +		rbtree_postorder_for_each_entry_safe(block, tmp, &mm->free_tree[order], rb) {
>   			BUG_ON(!drm_buddy_block_is_free(block));
>   			count++;
>   		}
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index 513837632b7d..9ee105d4309f 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -10,6 +10,7 @@
>   #include <linux/list.h>
>   #include <linux/slab.h>
>   #include <linux/sched.h>
> +#include <linux/rbtree.h>
>   
>   #include <drm/drm_print.h>
>   
> @@ -53,7 +54,11 @@ struct drm_buddy_block {
>   	 * a list, if so desired. As soon as the block is freed with
>   	 * drm_buddy_free* ownership is given back to the mm.
>   	 */
> -	struct list_head link;
> +	union {
> +		struct rb_node rb;
> +		struct list_head link;
> +	};
> +
>   	struct list_head tmp_link;
>   };
>   
> @@ -68,7 +73,7 @@ struct drm_buddy_block {
>    */
>   struct drm_buddy {
>   	/* Maintain a free list for each order. */
> -	struct list_head *free_list;
> +	struct rb_root *free_tree;
>   
>   	/*
>   	 * Maintain explicit binary tree(s) to track the allocation of the
> @@ -94,7 +99,7 @@ struct drm_buddy {
>   };
>   
>   static inline u64
> -drm_buddy_block_offset(struct drm_buddy_block *block)
> +drm_buddy_block_offset(const struct drm_buddy_block *block)
>   {
>   	return block->header & DRM_BUDDY_HEADER_OFFSET;
>   }
>
> base-commit: 3a9cf301794c1a49d95eeb13119ff490fb5cfe88


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v7 3/3] drm/buddy: Add KUnit tests for allocator performance under fragmentation
  2025-09-23  9:02 ` [PATCH v7 3/3] drm/buddy: Add KUnit tests for allocator performance under fragmentation Arunpravin Paneer Selvam
@ 2025-09-26 11:00   ` Matthew Auld
  2025-10-01  3:59     ` Arunpravin Paneer Selvam
  0 siblings, 1 reply; 11+ messages in thread
From: Matthew Auld @ 2025-09-26 11:00 UTC (permalink / raw)
  To: Arunpravin Paneer Selvam, christian.koenig, dri-devel, amd-gfx,
	intel-gfx, intel-xe
  Cc: alexander.deucher, jani.nikula, peterz, samuel.pitoiset

On 23/09/2025 10:02, Arunpravin Paneer Selvam wrote:
> Add KUnit test cases that create severe memory fragmentation and
> measure allocation/free performance.
> 
> The tests simulate two scenarios -
> 
> 1. Allocation under severe fragmentation
>     - Allocate the entire 4 GiB space as 8 KiB blocks with 64 KiB alignment,
>       split them into two groups and free with mixed flags to block coalescing.
>     - Repeatedly allocate and free 64 KiB blocks while timing the loop.
>     - Freelist runtime: 76475 ms(76.5 seconds), soft-lockup triggered.
>       RB-tree runtime: 186 ms.
> 
> 2. Reverse free order under fragmentation
>     - Create a similarly fragmented space, free half the blocks, reverse
>       the order of the remainder, and release them with the cleared flag.
>     - Freelist runtime: 85620 ms(85.6 seconds).
>       RB-tree runtime: 114 ms.
> 
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> ---
>   drivers/gpu/drm/tests/drm_buddy_test.c | 110 +++++++++++++++++++++++++
>   1 file changed, 110 insertions(+)
> 
> diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c
> index 7a0e523651f0..19b49fb6ec19 100644
> --- a/drivers/gpu/drm/tests/drm_buddy_test.c
> +++ b/drivers/gpu/drm/tests/drm_buddy_test.c
> @@ -21,6 +21,115 @@ static inline u64 get_size(int order, u64 chunk_size)
>   	return (1 << order) * chunk_size;
>   }
>   
> +static void drm_test_buddy_fragmentation_performance(struct kunit *test)
> +{
> +	const unsigned long max_acceptable_time_ms = 1000;
> +	struct drm_buddy_block *block, *tmp;
> +	int num_blocks, i, ret, count = 0;
> +	LIST_HEAD(allocated_blocks);
> +	unsigned long elapsed_ms;
> +	LIST_HEAD(reverse_list);
> +	LIST_HEAD(test_blocks);
> +	LIST_HEAD(clear_list);
> +	LIST_HEAD(dirty_list);
> +	LIST_HEAD(free_list);
> +	struct drm_buddy mm;
> +	u64 mm_size = SZ_4G;
> +	ktime_t start, end;
> +
> +	/*
> +	 * Allocation under severe fragmentation
> +	 *
> +	 * Create severe fragmentation by allocating the entire 4 GiB address space
> +	 * as tiny 8 KiB blocks but forcing a 64 KiB alignment. The resulting pattern
> +	 * leaves many scattered holes. Split the allocations into two groups and
> +	 * return them with different flags to block coalescing, then repeatedly
> +	 * allocate and free 64 KiB blocks while timing the loop. This stresses how
> +	 * quickly the allocator can satisfy larger, aligned requests from a pool of
> +	 * highly fragmented space.
> +	 */
> +	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
> +			       "buddy_init failed\n");
> +
> +	num_blocks = mm_size / SZ_64K;
> +
> +	start = ktime_get();
> +	/* Allocate with maximum fragmentation - 8K blocks with 64K alignment */
> +	for (i = 0; i < num_blocks; i++)
> +		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
> +								    &allocated_blocks, 0),
> +					"buddy_alloc hit an error size=%u\n", SZ_8K);
> +
> +	list_for_each_entry_safe(block, tmp, &allocated_blocks, link) {
> +		if (count % 4 == 0 || count % 4 == 3)
> +			list_move_tail(&block->link, &clear_list);
> +		else
> +			list_move_tail(&block->link, &dirty_list);
> +		count++;
> +	}
> +
> +	/* Free with different flags to ensure no coalescing */
> +	drm_buddy_free_list(&mm, &clear_list, DRM_BUDDY_CLEARED);
> +	drm_buddy_free_list(&mm, &dirty_list, 0);
> +
> +	for (i = 0; i < num_blocks; i++)
> +		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_64K, SZ_64K,
> +								    &test_blocks, 0),
> +					"buddy_alloc hit an error size=%u\n", SZ_64K);
> +	drm_buddy_free_list(&mm, &test_blocks, 0);
> +
> +	end = ktime_get();
> +	elapsed_ms = ktime_to_ms(ktime_sub(end, start));
> +	/* Performance validation */
> +	KUNIT_EXPECT_LT_MSG(test, elapsed_ms, max_acceptable_time_ms,
> +			    "Fragmented allocation took %lu ms (max acceptable: %lu ms)",
> +			    elapsed_ms, max_acceptable_time_ms);
> +	drm_buddy_fini(&mm);
> +
> +	/*
> +	 * Reverse free order under fragmentation
> +	 *
> +	 * Construct a fragmented 4 GiB space by allocating every 8 KiB block with
> +	 * 64 KiB alignment, creating a dense scatter of small regions. Half of the
> +	 * blocks are selectively freed to form sparse gaps, while the remaining
> +	 * allocations are preserved, reordered in reverse, and released back with
> +	 * the cleared flag. This models a pathological reverse-ordered free pattern
> +	 * and measures how quickly the allocator can merge and reclaim space when
> +	 * deallocation occurs in the opposite order of allocation, exposing the
> +	 * cost difference between a linear freelist scan and an ordered tree lookup.
> +	 */
> +	ret = drm_buddy_init(&mm, mm_size, SZ_4K);
> +	KUNIT_ASSERT_EQ(test, ret, 0);
> +
> +	start = ktime_get();
> +	/* Allocate maximum fragmentation */
> +	for (i = 0; i < num_blocks; i++)
> +		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
> +								    &allocated_blocks, 0),
> +					"buddy_alloc hit an error size=%u\n", SZ_8K);
> +
> +	list_for_each_entry_safe(block, tmp, &allocated_blocks, link) {
> +		if (count % 2 == 0)
> +			list_move_tail(&block->link, &free_list);
> +		count++;
> +	}
> +	drm_buddy_free_list(&mm, &free_list, DRM_BUDDY_CLEARED);
> +
> +	list_for_each_entry_safe_reverse(block, tmp, &allocated_blocks, link)
> +		list_move(&block->link, &reverse_list);
> +	drm_buddy_free_list(&mm, &reverse_list, DRM_BUDDY_CLEARED);
> +
> +	end = ktime_get();
> +	elapsed_ms = ktime_to_ms(ktime_sub(end, start));
> +
> +	/* Performance validation */
> +	KUNIT_EXPECT_LT_MSG(test, elapsed_ms, max_acceptable_time_ms,
> +			    "Reverse-ordered free took %lu ms (max acceptable: %lu ms)",
> +			    elapsed_ms, max_acceptable_time_ms);

Sorry for the delay. We are pretty sure these time asserts are not going 
to be flaky over many thousands of runs across different types of 
machines (maybe some underpowered atom)?

Assuming not a concern,
Reviewed-by: Matthew Auld <matthew.auld@intel.com>

> +
> +	drm_buddy_fini(&mm);
> +}
> +
>   static void drm_test_buddy_alloc_range_bias(struct kunit *test)
>   {
>   	u32 mm_size, size, ps, bias_size, bias_start, bias_end, bias_rem;
> @@ -772,6 +881,7 @@ static struct kunit_case drm_buddy_tests[] = {
>   	KUNIT_CASE(drm_test_buddy_alloc_contiguous),
>   	KUNIT_CASE(drm_test_buddy_alloc_clear),
>   	KUNIT_CASE(drm_test_buddy_alloc_range_bias),
> +	KUNIT_CASE(drm_test_buddy_fragmentation_performance),
>   	{}
>   };
>   


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v7 1/3] drm/buddy: Optimize free block management with RB tree
  2025-09-23  9:02 [PATCH v7 1/3] drm/buddy: Optimize free block management with RB tree Arunpravin Paneer Selvam
                   ` (4 preceding siblings ...)
  2025-09-26  5:41 ` [PATCH v7 1/3] " Arunpravin Paneer Selvam
@ 2025-09-26 16:05 ` Matthew Auld
  5 siblings, 0 replies; 11+ messages in thread
From: Matthew Auld @ 2025-09-26 16:05 UTC (permalink / raw)
  To: Arunpravin Paneer Selvam, christian.koenig, dri-devel, amd-gfx,
	intel-gfx, intel-xe
  Cc: alexander.deucher, jani.nikula, peterz, samuel.pitoiset, stable

On 23/09/2025 10:02, Arunpravin Paneer Selvam wrote:
> Replace the freelist (O(n)) used for free block management with a
> red-black tree, providing more efficient O(log n) search, insert,
> and delete operations. This improves scalability and performance
> when managing large numbers of free blocks per order (e.g., hundreds
> or thousands).
> 
> In the VK-CTS memory stress subtest, the buddy manager merges
> fragmented memory and inserts freed blocks into the freelist. Since
> freelist insertion is O(n), this becomes a bottleneck as fragmentation
> increases. Benchmarking shows list_insert_sorted() consumes ~52.69% CPU
> with the freelist, compared to just 0.03% with the RB tree
> (rbtree_insert.isra.0), despite performing the same sorted insert.
> 
> This also improves performance in heavily fragmented workloads,
> such as games or graphics tests that stress memory.
> 
> As the buddy allocator evolves with new features such as clear-page
> tracking, the resulting fragmentation and complexity have grown.
> These RB-tree based design changes are introduced to address that
> growth and ensure the allocator continues to perform efficiently
> under fragmented conditions.
> 
> The RB tree implementation with separate clear/dirty trees provides:
> - O(n log n) aggregate complexity for all operations instead of O(n^2)
> - Elimination of soft lockups and system instability
> - Improved code maintainability and clarity
> - Better scalability for large memory systems
> - Predictable performance under fragmentation
> 
> v3(Matthew):
>    - Remove RB_EMPTY_NODE check in force_merge function.
>    - Rename rb for loop macros to have less generic names and move to
>      .c file.
>    - Make the rb node rb and link field as union.
> 
> v4(Jani Nikula):
>    - The kernel-doc comment should be "/**"
>    - Move all the rbtree macros to rbtree.h and add parens to ensure
>      correct precedence.
> 
> v5:
>    - Remove the inline in a .c file (Jani Nikula).
> 
> v6(Peter Zijlstra):
>    - Add rb_add() function replacing the existing rbtree_insert() code.
> 
> v7:
>    - A full walk iteration in rbtree is slower than the list (Peter Zijlstra).
>    - The existing rbtree_postorder_for_each_entry_safe macro should be used
>      in scenarios where traversal order is not a critical factor (Christian).
> 
> Cc: stable@vger.kernel.org
> Fixes: a68c7eaa7a8f ("drm/amdgpu: Enable clear page functionality")
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>

Reviewed-by: Matthew Auld <matthew.auld@intel.com>


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v7 2/3] drm/buddy: Separate clear and dirty free block trees
  2025-09-23  9:02 ` [PATCH v7 2/3] drm/buddy: Separate clear and dirty free block trees Arunpravin Paneer Selvam
@ 2025-09-26 17:08   ` Matthew Auld
  2025-10-04  8:30     ` Arunpravin Paneer Selvam
  0 siblings, 1 reply; 11+ messages in thread
From: Matthew Auld @ 2025-09-26 17:08 UTC (permalink / raw)
  To: Arunpravin Paneer Selvam, christian.koenig, dri-devel, amd-gfx,
	intel-gfx, intel-xe
  Cc: alexander.deucher, jani.nikula, peterz, samuel.pitoiset, stable

On 23/09/2025 10:02, Arunpravin Paneer Selvam wrote:
> Maintain two separate RB trees per order - one for clear (zeroed) blocks
> and another for dirty (uncleared) blocks. This separation improves
> code clarity and makes it more obvious which tree is being searched
> during allocation. It also improves scalability and efficiency when
> searching for a specific type of block, avoiding unnecessary checks
> and making the allocator more predictable under fragmentation.
> 
> The changes have been validated using the existing drm_buddy_test
> KUnit test cases, along with selected graphics workloads,
> to ensure correctness and avoid regressions.
> 
> v2: Missed adding the suggested-by tag. Added it in v2.
> 
> v3(Matthew):
>    - Remove the double underscores from the internal functions.
>    - Rename the internal functions to have less generic names.
>    - Fix the error handling code.
>    - Pass tree argument for the tree macro.
>    - Use the existing dirty/free bit instead of new tree field.
>    - Make free_trees[] instead of clear_tree and dirty_tree for
>      more cleaner approach.
> 
> v4:
>    - A bug was reported by Intel CI and it is fixed by
>      Matthew Auld.
>    - Replace the get_root function with
>      &mm->free_trees[tree][order] (Matthew)
>    - Remove the unnecessary rbtree_is_empty() check (Matthew)
>    - Remove the unnecessary get_tree_for_flags() function.
>    - Rename get_tree_for_block() name with get_block_tree() for more
>      clarity.
> 
> v5(Jani Nikula):
>    - Don't use static inline in .c files.
>    - enum free_tree and enumerator names are quite generic for a header
>      and usage and the whole enum should be an implementation detail.
> 
> v6:
>    - Rewrite the __force_merge() function using the rb_last() and rb_prev().
> 
> Cc: stable@vger.kernel.org
> Fixes: a68c7eaa7a8f ("drm/amdgpu: Enable clear page functionality")
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> Suggested-by: Matthew Auld <matthew.auld@intel.com>
> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4260
> ---
>   drivers/gpu/drm/drm_buddy.c | 321 ++++++++++++++++++++----------------
>   include/drm/drm_buddy.h     |   2 +-
>   2 files changed, 182 insertions(+), 141 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index 67aa67229cc3..6e12a0b5d5e3 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -12,9 +12,16 @@
>   
>   #include <drm/drm_buddy.h>
>   
> +enum drm_buddy_free_tree {
> +	DRM_BUDDY_CLEAR_TREE = 0,
> +	DRM_BUDDY_DIRTY_TREE,
> +	DRM_BUDDY_MAX_FREE_TREES,
> +};
> +
>   static struct kmem_cache *slab_blocks;
>   
> -#define rbtree_get_free_block(node) rb_entry((node), struct drm_buddy_block, rb)
> +#define for_each_free_tree(tree) \
> +	for ((tree) = 0; (tree) < DRM_BUDDY_MAX_FREE_TREES; (tree)++)

Some places seem to open code this? Maybe just drop this or use it 
everywhere?

>   
>   static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
>   					       struct drm_buddy_block *parent,
> @@ -45,6 +52,30 @@ static void drm_block_free(struct drm_buddy *mm,
>   	kmem_cache_free(slab_blocks, block);
>   }
>   
> +static enum drm_buddy_free_tree
> +get_block_tree(struct drm_buddy_block *block)
> +{
> +	return drm_buddy_block_is_clear(block) ?
> +	       DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
> +}
> +
> +static struct drm_buddy_block *
> +rbtree_get_free_block(const struct rb_node *node)
> +{
> +	return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
> +}
> +
> +static struct drm_buddy_block *
> +rbtree_last_free_block(struct rb_root *root)
> +{
> +	return rbtree_get_free_block(rb_last(root));
> +}
> +
> +static bool rbtree_is_empty(struct rb_root *root)
> +{
> +	return RB_EMPTY_ROOT(root);
> +}
> +
>   static bool drm_buddy_block_offset_less(const struct drm_buddy_block *block,
>   					const struct drm_buddy_block *node)
>   {
> @@ -59,37 +90,28 @@ static bool rbtree_block_offset_less(struct rb_node *block,
>   }
>   
>   static void rbtree_insert(struct drm_buddy *mm,
> -			  struct drm_buddy_block *block)
> +			  struct drm_buddy_block *block,
> +			  enum drm_buddy_free_tree tree)
>   {
>   	rb_add(&block->rb,
> -	       &mm->free_tree[drm_buddy_block_order(block)],
> +	       &mm->free_trees[tree][drm_buddy_block_order(block)],
>   	       rbtree_block_offset_less);
>   }
>   
>   static void rbtree_remove(struct drm_buddy *mm,
>   			  struct drm_buddy_block *block)
>   {
> +	unsigned int order = drm_buddy_block_order(block);
>   	struct rb_root *root;
> +	enum drm_buddy_free_tree tree;
>   
> -	root = &mm->free_tree[drm_buddy_block_order(block)];
> -	rb_erase(&block->rb, root);
> +	tree = get_block_tree(block);
> +	root = &mm->free_trees[tree][order];
>   
> +	rb_erase(&block->rb, root);
>   	RB_CLEAR_NODE(&block->rb);
>   }
>   
> -static struct drm_buddy_block *
> -rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
> -{
> -	struct rb_node *node = rb_last(&mm->free_tree[order]);
> -
> -	return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
> -}
> -
> -static bool rbtree_is_empty(struct drm_buddy *mm, unsigned int order)
> -{
> -	return RB_EMPTY_ROOT(&mm->free_tree[order]);
> -}
> -
>   static void clear_reset(struct drm_buddy_block *block)
>   {
>   	block->header &= ~DRM_BUDDY_HEADER_CLEAR;
> @@ -112,10 +134,13 @@ static void mark_allocated(struct drm_buddy *mm,
>   static void mark_free(struct drm_buddy *mm,
>   		      struct drm_buddy_block *block)
>   {
> +	enum drm_buddy_free_tree tree;
> +
>   	block->header &= ~DRM_BUDDY_HEADER_STATE;
>   	block->header |= DRM_BUDDY_FREE;
>   
> -	rbtree_insert(mm, block);
> +	tree = get_block_tree(block);
> +	rbtree_insert(mm, block, tree);
>   }
>   
>   static void mark_split(struct drm_buddy *mm,
> @@ -201,6 +226,7 @@ static int __force_merge(struct drm_buddy *mm,
>   			 u64 end,
>   			 unsigned int min_order)
>   {
> +	enum drm_buddy_free_tree tree;
>   	unsigned int order;
>   	int i;
>   
> @@ -210,45 +236,48 @@ static int __force_merge(struct drm_buddy *mm,
>   	if (min_order > mm->max_order)
>   		return -EINVAL;
>   
> -	for (i = min_order - 1; i >= 0; i--) {
> -		struct rb_root *root = &mm->free_tree[i];
> -		struct rb_node *iter;
> +	for_each_free_tree(tree) {
> +		for (i = min_order - 1; i >= 0; i--) {
> +			struct rb_node *iter = rb_last(&mm->free_trees[tree][i]);
>   
> -		iter = rb_last(root);
> -
> -		while (iter) {
> -			struct drm_buddy_block *block, *buddy;
> -			u64 block_start, block_end;
> +			while (iter) {
> +				struct drm_buddy_block *block, *buddy;
> +				u64 block_start, block_end;
>   
> -			block = rbtree_get_free_block(iter);
> -			iter = rb_prev(iter);
> +				block = rbtree_get_free_block(iter);
> +				iter = rb_prev(iter);
>   
> -			if (!block || !block->parent)
> -				continue;
> +				if (!block || !block->parent)
> +					continue;
>   
> -			block_start = drm_buddy_block_offset(block);
> -			block_end = block_start + drm_buddy_block_size(mm, block) - 1;
> +				block_start = drm_buddy_block_offset(block);
> +				block_end = block_start + drm_buddy_block_size(mm, block) - 1;
>   
> -			if (!contains(start, end, block_start, block_end))
> -				continue;
> +				if (!contains(start, end, block_start, block_end))
> +					continue;
>   
> -			buddy = __get_buddy(block);
> -			if (!drm_buddy_block_is_free(buddy))
> -				continue;
> +				buddy = __get_buddy(block);
> +				if (!drm_buddy_block_is_free(buddy))
> +					continue;
>   
> -			WARN_ON(drm_buddy_block_is_clear(block) ==
> -				drm_buddy_block_is_clear(buddy));
> +				WARN_ON(drm_buddy_block_is_clear(block) ==
> +					drm_buddy_block_is_clear(buddy));
>   
> -			if (iter == &buddy->rb)
> -				iter = rb_prev(iter);
> +				/*
> +				 * Advance to the next node when the current node is the buddy,
> +				 * as freeing the block will also remove its buddy from the tree.
> +				 */
> +				if (iter == &buddy->rb)
> +					iter = rb_prev(iter);
>   
> -			rbtree_remove(mm, block);
> -			if (drm_buddy_block_is_clear(block))
> -				mm->clear_avail -= drm_buddy_block_size(mm, block);
> +				rbtree_remove(mm, block);
> +				if (drm_buddy_block_is_clear(block))
> +					mm->clear_avail -= drm_buddy_block_size(mm, block);
>   
> -			order = __drm_buddy_free(mm, block, true);
> -			if (order >= min_order)
> -				return 0;
> +				order = __drm_buddy_free(mm, block, true);
> +				if (order >= min_order)
> +					return 0;
> +			}

Do we need all these changes in __force_merge? Can't we just always pick 
the dirty tree and keep everything else the same? If something is 
non-merged there must be a dirty block preventing that, and when force 
merging everything unmerged will be re-labled as dirty anyway, so the 
second loop through the clean tree should always yield nothing?

>   		}
>   	}
>   
> @@ -269,7 +298,7 @@ static int __force_merge(struct drm_buddy *mm,
>    */
>   int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
>   {
> -	unsigned int i;
> +	unsigned int i, j;
>   	u64 offset;
>   
>   	if (size < chunk_size)
> @@ -291,14 +320,22 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
>   
>   	BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
>   
> -	mm->free_tree = kmalloc_array(mm->max_order + 1,
> -				      sizeof(struct rb_root),
> -				      GFP_KERNEL);
> -	if (!mm->free_tree)
> +	mm->free_trees = kmalloc_array(DRM_BUDDY_MAX_FREE_TREES,
> +				       sizeof(*mm->free_trees),
> +				       GFP_KERNEL);
> +	if (!mm->free_trees)
>   		return -ENOMEM;
>   
> -	for (i = 0; i <= mm->max_order; ++i)
> -		mm->free_tree[i] = RB_ROOT;
> +	for (i = 0; i < DRM_BUDDY_MAX_FREE_TREES; i++) {
> +		mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
> +						  sizeof(struct rb_root),
> +						  GFP_KERNEL);
> +		if (!mm->free_trees[i])
> +			goto out_free_tree;
> +
> +		for (j = 0; j <= mm->max_order; ++j)
> +			mm->free_trees[i][j] = RB_ROOT;
> +	}
>   
>   	mm->n_roots = hweight64(size);
>   
> @@ -346,7 +383,9 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
>   		drm_block_free(mm, mm->roots[i]);
>   	kfree(mm->roots);
>   out_free_tree:
> -	kfree(mm->free_tree);
> +	while (i--)
> +		kfree(mm->free_trees[i]);

out_free_roots is also decrementing 'i' here it seems, so looks like 
this might blow up?

> +	kfree(mm->free_trees);
>   	return -ENOMEM;
>   }
>   EXPORT_SYMBOL(drm_buddy_init);
> @@ -382,8 +421,9 @@ void drm_buddy_fini(struct drm_buddy *mm)
>   
>   	WARN_ON(mm->avail != mm->size);
>   
> +	for (i = 0; i < DRM_BUDDY_MAX_FREE_TREES; i++)
> +		kfree(mm->free_trees[i]);
>   	kfree(mm->roots);
> -	kfree(mm->free_tree);
>   }
>   EXPORT_SYMBOL(drm_buddy_fini);
>   
> @@ -407,8 +447,7 @@ static int split_block(struct drm_buddy *mm,
>   		return -ENOMEM;
>   	}
>   
> -	mark_free(mm, block->left);
> -	mark_free(mm, block->right);
> +	mark_split(mm, block);
>   
>   	if (drm_buddy_block_is_clear(block)) {
>   		mark_cleared(block->left);
> @@ -416,7 +455,8 @@ static int split_block(struct drm_buddy *mm,
>   		clear_reset(block);
>   	}
>   
> -	mark_split(mm, block);
> +	mark_free(mm, block->left);
> +	mark_free(mm, block->right);
>   
>   	return 0;
>   }
> @@ -449,6 +489,7 @@ EXPORT_SYMBOL(drm_get_buddy);
>    */
>   void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
>   {
> +	enum drm_buddy_free_tree src_tree, dst_tree;
>   	u64 root_size, size, start;
>   	unsigned int order;
>   	int i;
> @@ -463,19 +504,24 @@ void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
>   		size -= root_size;
>   	}
>   
> +	src_tree = is_clear ? DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE;
> +	dst_tree = is_clear ? DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
> +
>   	for (i = 0; i <= mm->max_order; ++i) {
> +		struct rb_root *root = &mm->free_trees[src_tree][i];
>   		struct drm_buddy_block *block, *tmp;
>   
> -		rbtree_postorder_for_each_entry_safe(block, tmp, &mm->free_tree[i], rb) {
> -			if (is_clear != drm_buddy_block_is_clear(block)) {
> -				if (is_clear) {
> -					mark_cleared(block);
> -					mm->clear_avail += drm_buddy_block_size(mm, block);
> -				} else {
> -					clear_reset(block);
> -					mm->clear_avail -= drm_buddy_block_size(mm, block);
> -				}
> +		rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
> +			rbtree_remove(mm, block);
> +			if (is_clear) {
> +				mark_cleared(block);
> +				mm->clear_avail += drm_buddy_block_size(mm, block);
> +			} else {
> +				clear_reset(block);
> +				mm->clear_avail -= drm_buddy_block_size(mm, block);
>   			}
> +
> +			rbtree_insert(mm, block, dst_tree);
>   		}
>   	}
>   }
> @@ -665,27 +711,17 @@ __drm_buddy_alloc_range_bias(struct drm_buddy *mm,
>   }
>   
>   static struct drm_buddy_block *
> -get_maxblock(struct drm_buddy *mm, unsigned int order,
> -	     unsigned long flags)
> +get_maxblock(struct drm_buddy *mm,
> +	     unsigned int order,
> +	     enum drm_buddy_free_tree tree)
>   {
>   	struct drm_buddy_block *max_block = NULL, *block = NULL;
> +	struct rb_root *root;
>   	unsigned int i;
>   
>   	for (i = order; i <= mm->max_order; ++i) {
> -		struct rb_node *iter = rb_last(&mm->free_tree[i]);
> -		struct drm_buddy_block *tmp_block;
> -
> -		while (iter) {
> -			tmp_block = rbtree_get_free_block(iter);
> -
> -			if (!block_incompatible(tmp_block, flags)) {
> -				block = tmp_block;
> -				break;
> -			}
> -
> -			iter = rb_prev(iter);
> -		}
> -
> +		root = &mm->free_trees[tree][i];
> +		block = rbtree_last_free_block(root);
>   		if (!block)
>   			continue;
>   
> @@ -709,43 +745,39 @@ alloc_from_freetree(struct drm_buddy *mm,
>   		    unsigned long flags)
>   {
>   	struct drm_buddy_block *block = NULL;
> +	struct rb_root *root;
> +	enum drm_buddy_free_tree tree;
>   	unsigned int tmp;
>   	int err;
>   
> +	tree = (flags & DRM_BUDDY_CLEAR_ALLOCATION) ?
> +		DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
> +
>   	if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
> -		block = get_maxblock(mm, order, flags);
> +		block = get_maxblock(mm, order, tree);
>   		if (block)
>   			/* Store the obtained block order */
>   			tmp = drm_buddy_block_order(block);
>   	} else {
>   		for (tmp = order; tmp <= mm->max_order; ++tmp) {
> -			struct rb_node *iter = rb_last(&mm->free_tree[tmp]);
> -			struct drm_buddy_block *tmp_block;
> -
> -			while (iter) {
> -				tmp_block = rbtree_get_free_block(iter);
> -
> -				if (!block_incompatible(tmp_block, flags)) {
> -					block = tmp_block;
> -					break;
> -				}
> -
> -				iter = rb_prev(iter);
> -			}
> -
> +			/* Get RB tree root for this order and tree */
> +			root = &mm->free_trees[tree][tmp];
> +			block = rbtree_last_free_block(root);
>   			if (block)
>   				break;
>   		}
>   	}
>   
>   	if (!block) {
> -		/* Fallback method */
> +		/* Try allocating from the other tree */
> +		tree = (tree == DRM_BUDDY_CLEAR_TREE) ?
> +			DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE;
> +
>   		for (tmp = order; tmp <= mm->max_order; ++tmp) {
> -			if (!rbtree_is_empty(mm, tmp)) {

Did you mean to drop the is_empty() check here? If it's overkill I think 
better just not add it in the previous patch?

> -				block = rbtree_last_entry(mm, tmp);
> -				if (block)
> -					break;
> -			}
> +			root = &mm->free_trees[tree][tmp];
> +			block = rbtree_last_free_block(root);
> +			if (block)
> +				break;
>   		}
>   
>   		if (!block)
> @@ -887,9 +919,9 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
>   				     struct list_head *blocks)
>   {
>   	u64 rhs_offset, lhs_offset, lhs_size, filled;
> +	enum drm_buddy_free_tree tree;
>   	struct drm_buddy_block *block;
>   	LIST_HEAD(blocks_lhs);
> -	struct rb_node *iter;
>   	unsigned long pages;
>   	unsigned int order;
>   	u64 modify_size;
> @@ -901,40 +933,43 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
>   	if (order == 0)
>   		return -ENOSPC;
>   
> -	if (rbtree_is_empty(mm, order))
> +	if (rbtree_is_empty(&mm->free_trees[DRM_BUDDY_CLEAR_TREE][order]) &&
> +	    rbtree_is_empty(&mm->free_trees[DRM_BUDDY_DIRTY_TREE][order]))

Could potentially merge this with the for_each_tree loop below?

Otherwise,
Reviewed-by: Matthew Auld <matthew.auld@intel.com>

>   		return -ENOSPC;
>   
> -	iter = rb_last(&mm->free_tree[order]);
> -
> -	while (iter) {
> -		block = rbtree_get_free_block(iter);
> -
> -		/* Allocate blocks traversing RHS */
> -		rhs_offset = drm_buddy_block_offset(block);
> -		err =  __drm_buddy_alloc_range(mm, rhs_offset, size,
> -					       &filled, blocks);
> -		if (!err || err != -ENOSPC)
> -			return err;
> -
> -		lhs_size = max((size - filled), min_block_size);
> -		if (!IS_ALIGNED(lhs_size, min_block_size))
> -			lhs_size = round_up(lhs_size, min_block_size);
> -
> -		/* Allocate blocks traversing LHS */
> -		lhs_offset = drm_buddy_block_offset(block) - lhs_size;
> -		err =  __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
> -					       NULL, &blocks_lhs);
> -		if (!err) {
> -			list_splice(&blocks_lhs, blocks);
> -			return 0;
> -		} else if (err != -ENOSPC) {
> +	for_each_free_tree(tree) {
> +		struct rb_node *iter = rb_last(&mm->free_trees[tree][order]);
> +
> +		while (iter) {
> +			block = rbtree_get_free_block(iter);
> +
> +			/* Allocate blocks traversing RHS */
> +			rhs_offset = drm_buddy_block_offset(block);
> +			err =  __drm_buddy_alloc_range(mm, rhs_offset, size,
> +						       &filled, blocks);
> +			if (!err || err != -ENOSPC)
> +				return err;
> +
> +			lhs_size = max((size - filled), min_block_size);
> +			if (!IS_ALIGNED(lhs_size, min_block_size))
> +				lhs_size = round_up(lhs_size, min_block_size);
> +
> +			/* Allocate blocks traversing LHS */
> +			lhs_offset = drm_buddy_block_offset(block) - lhs_size;
> +			err =  __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
> +						       NULL, &blocks_lhs);
> +			if (!err) {
> +				list_splice(&blocks_lhs, blocks);
> +				return 0;
> +			} else if (err != -ENOSPC) {
> +				drm_buddy_free_list_internal(mm, blocks);
> +				return err;
> +			}
> +			/* Free blocks for the next iteration */
>   			drm_buddy_free_list_internal(mm, blocks);
> -			return err;
> -		}
> -		/* Free blocks for the next iteration */
> -		drm_buddy_free_list_internal(mm, blocks);
>   
> -		iter = rb_prev(iter);
> +			iter = rb_prev(iter);
> +		}
>   	}
>   
>   	return -ENOSPC;
> @@ -1239,6 +1274,7 @@ EXPORT_SYMBOL(drm_buddy_block_print);
>    */
>   void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
>   {
> +	enum drm_buddy_free_tree tree;
>   	int order;
>   
>   	drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
> @@ -1246,11 +1282,16 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
>   
>   	for (order = mm->max_order; order >= 0; order--) {
>   		struct drm_buddy_block *block, *tmp;
> +		struct rb_root *root;
>   		u64 count = 0, free;
>   
> -		rbtree_postorder_for_each_entry_safe(block, tmp, &mm->free_tree[order], rb) {
> -			BUG_ON(!drm_buddy_block_is_free(block));
> -			count++;
> +		for_each_free_tree(tree) {
> +			root = &mm->free_trees[tree][order];
> +
> +			rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
> +				BUG_ON(!drm_buddy_block_is_free(block));
> +				count++;
> +			}
>   		}
>   
>   		drm_printf(p, "order-%2d ", order);
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index 9ee105d4309f..d7891d08f67a 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -73,7 +73,7 @@ struct drm_buddy_block {
>    */
>   struct drm_buddy {
>   	/* Maintain a free list for each order. */
> -	struct rb_root *free_tree;
> +	struct rb_root **free_trees;
>   
>   	/*
>   	 * Maintain explicit binary tree(s) to track the allocation of the


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v7 3/3] drm/buddy: Add KUnit tests for allocator performance under fragmentation
  2025-09-26 11:00   ` Matthew Auld
@ 2025-10-01  3:59     ` Arunpravin Paneer Selvam
  0 siblings, 0 replies; 11+ messages in thread
From: Arunpravin Paneer Selvam @ 2025-10-01  3:59 UTC (permalink / raw)
  To: Matthew Auld, christian.koenig, dri-devel, amd-gfx, intel-gfx,
	intel-xe
  Cc: alexander.deucher, jani.nikula, peterz, samuel.pitoiset



On 9/26/2025 4:30 PM, Matthew Auld wrote:
> On 23/09/2025 10:02, Arunpravin Paneer Selvam wrote:
>> Add KUnit test cases that create severe memory fragmentation and
>> measure allocation/free performance.
>>
>> The tests simulate two scenarios -
>>
>> 1. Allocation under severe fragmentation
>>     - Allocate the entire 4 GiB space as 8 KiB blocks with 64 KiB 
>> alignment,
>>       split them into two groups and free with mixed flags to block 
>> coalescing.
>>     - Repeatedly allocate and free 64 KiB blocks while timing the loop.
>>     - Freelist runtime: 76475 ms(76.5 seconds), soft-lockup triggered.
>>       RB-tree runtime: 186 ms.
>>
>> 2. Reverse free order under fragmentation
>>     - Create a similarly fragmented space, free half the blocks, reverse
>>       the order of the remainder, and release them with the cleared 
>> flag.
>>     - Freelist runtime: 85620 ms(85.6 seconds).
>>       RB-tree runtime: 114 ms.
>>
>> Signed-off-by: Arunpravin Paneer Selvam 
>> <Arunpravin.PaneerSelvam@amd.com>
>> ---
>>   drivers/gpu/drm/tests/drm_buddy_test.c | 110 +++++++++++++++++++++++++
>>   1 file changed, 110 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
>> b/drivers/gpu/drm/tests/drm_buddy_test.c
>> index 7a0e523651f0..19b49fb6ec19 100644
>> --- a/drivers/gpu/drm/tests/drm_buddy_test.c
>> +++ b/drivers/gpu/drm/tests/drm_buddy_test.c
>> @@ -21,6 +21,115 @@ static inline u64 get_size(int order, u64 
>> chunk_size)
>>       return (1 << order) * chunk_size;
>>   }
>>   +static void drm_test_buddy_fragmentation_performance(struct kunit 
>> *test)
>> +{
>> +    const unsigned long max_acceptable_time_ms = 1000;
>> +    struct drm_buddy_block *block, *tmp;
>> +    int num_blocks, i, ret, count = 0;
>> +    LIST_HEAD(allocated_blocks);
>> +    unsigned long elapsed_ms;
>> +    LIST_HEAD(reverse_list);
>> +    LIST_HEAD(test_blocks);
>> +    LIST_HEAD(clear_list);
>> +    LIST_HEAD(dirty_list);
>> +    LIST_HEAD(free_list);
>> +    struct drm_buddy mm;
>> +    u64 mm_size = SZ_4G;
>> +    ktime_t start, end;
>> +
>> +    /*
>> +     * Allocation under severe fragmentation
>> +     *
>> +     * Create severe fragmentation by allocating the entire 4 GiB 
>> address space
>> +     * as tiny 8 KiB blocks but forcing a 64 KiB alignment. The 
>> resulting pattern
>> +     * leaves many scattered holes. Split the allocations into two 
>> groups and
>> +     * return them with different flags to block coalescing, then 
>> repeatedly
>> +     * allocate and free 64 KiB blocks while timing the loop. This 
>> stresses how
>> +     * quickly the allocator can satisfy larger, aligned requests 
>> from a pool of
>> +     * highly fragmented space.
>> +     */
>> +    KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
>> +                   "buddy_init failed\n");
>> +
>> +    num_blocks = mm_size / SZ_64K;
>> +
>> +    start = ktime_get();
>> +    /* Allocate with maximum fragmentation - 8K blocks with 64K 
>> alignment */
>> +    for (i = 0; i < num_blocks; i++)
>> +        KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, 
>> mm_size, SZ_8K, SZ_64K,
>> +                                    &allocated_blocks, 0),
>> +                    "buddy_alloc hit an error size=%u\n", SZ_8K);
>> +
>> +    list_for_each_entry_safe(block, tmp, &allocated_blocks, link) {
>> +        if (count % 4 == 0 || count % 4 == 3)
>> +            list_move_tail(&block->link, &clear_list);
>> +        else
>> +            list_move_tail(&block->link, &dirty_list);
>> +        count++;
>> +    }
>> +
>> +    /* Free with different flags to ensure no coalescing */
>> +    drm_buddy_free_list(&mm, &clear_list, DRM_BUDDY_CLEARED);
>> +    drm_buddy_free_list(&mm, &dirty_list, 0);
>> +
>> +    for (i = 0; i < num_blocks; i++)
>> +        KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, 
>> mm_size, SZ_64K, SZ_64K,
>> +                                    &test_blocks, 0),
>> +                    "buddy_alloc hit an error size=%u\n", SZ_64K);
>> +    drm_buddy_free_list(&mm, &test_blocks, 0);
>> +
>> +    end = ktime_get();
>> +    elapsed_ms = ktime_to_ms(ktime_sub(end, start));
>> +    /* Performance validation */
>> +    KUNIT_EXPECT_LT_MSG(test, elapsed_ms, max_acceptable_time_ms,
>> +                "Fragmented allocation took %lu ms (max acceptable: 
>> %lu ms)",
>> +                elapsed_ms, max_acceptable_time_ms);
>> +    drm_buddy_fini(&mm);
>> +
>> +    /*
>> +     * Reverse free order under fragmentation
>> +     *
>> +     * Construct a fragmented 4 GiB space by allocating every 8 KiB 
>> block with
>> +     * 64 KiB alignment, creating a dense scatter of small regions. 
>> Half of the
>> +     * blocks are selectively freed to form sparse gaps, while the 
>> remaining
>> +     * allocations are preserved, reordered in reverse, and released 
>> back with
>> +     * the cleared flag. This models a pathological reverse-ordered 
>> free pattern
>> +     * and measures how quickly the allocator can merge and reclaim 
>> space when
>> +     * deallocation occurs in the opposite order of allocation, 
>> exposing the
>> +     * cost difference between a linear freelist scan and an ordered 
>> tree lookup.
>> +     */
>> +    ret = drm_buddy_init(&mm, mm_size, SZ_4K);
>> +    KUNIT_ASSERT_EQ(test, ret, 0);
>> +
>> +    start = ktime_get();
>> +    /* Allocate maximum fragmentation */
>> +    for (i = 0; i < num_blocks; i++)
>> +        KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, 
>> mm_size, SZ_8K, SZ_64K,
>> +                                    &allocated_blocks, 0),
>> +                    "buddy_alloc hit an error size=%u\n", SZ_8K);
>> +
>> +    list_for_each_entry_safe(block, tmp, &allocated_blocks, link) {
>> +        if (count % 2 == 0)
>> +            list_move_tail(&block->link, &free_list);
>> +        count++;
>> +    }
>> +    drm_buddy_free_list(&mm, &free_list, DRM_BUDDY_CLEARED);
>> +
>> +    list_for_each_entry_safe_reverse(block, tmp, &allocated_blocks, 
>> link)
>> +        list_move(&block->link, &reverse_list);
>> +    drm_buddy_free_list(&mm, &reverse_list, DRM_BUDDY_CLEARED);
>> +
>> +    end = ktime_get();
>> +    elapsed_ms = ktime_to_ms(ktime_sub(end, start));
>> +
>> +    /* Performance validation */
>> +    KUNIT_EXPECT_LT_MSG(test, elapsed_ms, max_acceptable_time_ms,
>> +                "Reverse-ordered free took %lu ms (max acceptable: 
>> %lu ms)",
>> +                elapsed_ms, max_acceptable_time_ms);
>
> Sorry for the delay. We are pretty sure these time asserts are not 
> going to be flaky over many thousands of runs across different types 
> of machines (maybe some underpowered atom)?
yes, correct. I have updated the performance test to avoid the hard 
coded timing thresholds. And the test now measures and reports execution 
time instead of enforcing a 1000ms limit,
since run times vary across machines. This ensures the test remains 
portable and stable, while still exposing the performance data for 
regression tracking.

Regards,
Arun.
>
> Assuming not a concern,
> Reviewed-by: Matthew Auld <matthew.auld@intel.com>
>
>> +
>> +    drm_buddy_fini(&mm);
>> +}
>> +
>>   static void drm_test_buddy_alloc_range_bias(struct kunit *test)
>>   {
>>       u32 mm_size, size, ps, bias_size, bias_start, bias_end, bias_rem;
>> @@ -772,6 +881,7 @@ static struct kunit_case drm_buddy_tests[] = {
>>       KUNIT_CASE(drm_test_buddy_alloc_contiguous),
>>       KUNIT_CASE(drm_test_buddy_alloc_clear),
>>       KUNIT_CASE(drm_test_buddy_alloc_range_bias),
>> +    KUNIT_CASE(drm_test_buddy_fragmentation_performance),
>>       {}
>>   };
>


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v7 2/3] drm/buddy: Separate clear and dirty free block trees
  2025-09-26 17:08   ` Matthew Auld
@ 2025-10-04  8:30     ` Arunpravin Paneer Selvam
  0 siblings, 0 replies; 11+ messages in thread
From: Arunpravin Paneer Selvam @ 2025-10-04  8:30 UTC (permalink / raw)
  To: Matthew Auld, christian.koenig, dri-devel, amd-gfx, intel-gfx,
	intel-xe
  Cc: alexander.deucher, jani.nikula, peterz, samuel.pitoiset, stable

Hi Matthew,

On 9/26/2025 10:38 PM, Matthew Auld wrote:
> On 23/09/2025 10:02, Arunpravin Paneer Selvam wrote:
>> Maintain two separate RB trees per order - one for clear (zeroed) blocks
>> and another for dirty (uncleared) blocks. This separation improves
>> code clarity and makes it more obvious which tree is being searched
>> during allocation. It also improves scalability and efficiency when
>> searching for a specific type of block, avoiding unnecessary checks
>> and making the allocator more predictable under fragmentation.
>>
>> The changes have been validated using the existing drm_buddy_test
>> KUnit test cases, along with selected graphics workloads,
>> to ensure correctness and avoid regressions.
>>
>> v2: Missed adding the suggested-by tag. Added it in v2.
>>
>> v3(Matthew):
>>    - Remove the double underscores from the internal functions.
>>    - Rename the internal functions to have less generic names.
>>    - Fix the error handling code.
>>    - Pass tree argument for the tree macro.
>>    - Use the existing dirty/free bit instead of new tree field.
>>    - Make free_trees[] instead of clear_tree and dirty_tree for
>>      more cleaner approach.
>>
>> v4:
>>    - A bug was reported by Intel CI and it is fixed by
>>      Matthew Auld.
>>    - Replace the get_root function with
>>      &mm->free_trees[tree][order] (Matthew)
>>    - Remove the unnecessary rbtree_is_empty() check (Matthew)
>>    - Remove the unnecessary get_tree_for_flags() function.
>>    - Rename get_tree_for_block() name with get_block_tree() for more
>>      clarity.
>>
>> v5(Jani Nikula):
>>    - Don't use static inline in .c files.
>>    - enum free_tree and enumerator names are quite generic for a header
>>      and usage and the whole enum should be an implementation detail.
>>
>> v6:
>>    - Rewrite the __force_merge() function using the rb_last() and 
>> rb_prev().
>>
>> Cc: stable@vger.kernel.org
>> Fixes: a68c7eaa7a8f ("drm/amdgpu: Enable clear page functionality")
>> Signed-off-by: Arunpravin Paneer Selvam 
>> <Arunpravin.PaneerSelvam@amd.com>
>> Suggested-by: Matthew Auld <matthew.auld@intel.com>
>> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4260
>> ---
>>   drivers/gpu/drm/drm_buddy.c | 321 ++++++++++++++++++++----------------
>>   include/drm/drm_buddy.h     |   2 +-
>>   2 files changed, 182 insertions(+), 141 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index 67aa67229cc3..6e12a0b5d5e3 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -12,9 +12,16 @@
>>     #include <drm/drm_buddy.h>
>>   +enum drm_buddy_free_tree {
>> +    DRM_BUDDY_CLEAR_TREE = 0,
>> +    DRM_BUDDY_DIRTY_TREE,
>> +    DRM_BUDDY_MAX_FREE_TREES,
>> +};
>> +
>>   static struct kmem_cache *slab_blocks;
>>   -#define rbtree_get_free_block(node) rb_entry((node), struct 
>> drm_buddy_block, rb)
>> +#define for_each_free_tree(tree) \
>> +    for ((tree) = 0; (tree) < DRM_BUDDY_MAX_FREE_TREES; (tree)++)
>
> Some places seem to open code this? Maybe just drop this or use it 
> everywhere?
I replaced all open code places with the for_each_free_tree(tree) macro.
>
>>     static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
>>                              struct drm_buddy_block *parent,
>> @@ -45,6 +52,30 @@ static void drm_block_free(struct drm_buddy *mm,
>>       kmem_cache_free(slab_blocks, block);
>>   }
>>   +static enum drm_buddy_free_tree
>> +get_block_tree(struct drm_buddy_block *block)
>> +{
>> +    return drm_buddy_block_is_clear(block) ?
>> +           DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
>> +}
>> +
>> +static struct drm_buddy_block *
>> +rbtree_get_free_block(const struct rb_node *node)
>> +{
>> +    return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
>> +}
>> +
>> +static struct drm_buddy_block *
>> +rbtree_last_free_block(struct rb_root *root)
>> +{
>> +    return rbtree_get_free_block(rb_last(root));
>> +}
>> +
>> +static bool rbtree_is_empty(struct rb_root *root)
>> +{
>> +    return RB_EMPTY_ROOT(root);
>> +}
>> +
>>   static bool drm_buddy_block_offset_less(const struct 
>> drm_buddy_block *block,
>>                       const struct drm_buddy_block *node)
>>   {
>> @@ -59,37 +90,28 @@ static bool rbtree_block_offset_less(struct 
>> rb_node *block,
>>   }
>>     static void rbtree_insert(struct drm_buddy *mm,
>> -              struct drm_buddy_block *block)
>> +              struct drm_buddy_block *block,
>> +              enum drm_buddy_free_tree tree)
>>   {
>>       rb_add(&block->rb,
>> -           &mm->free_tree[drm_buddy_block_order(block)],
>> + &mm->free_trees[tree][drm_buddy_block_order(block)],
>>              rbtree_block_offset_less);
>>   }
>>     static void rbtree_remove(struct drm_buddy *mm,
>>                 struct drm_buddy_block *block)
>>   {
>> +    unsigned int order = drm_buddy_block_order(block);
>>       struct rb_root *root;
>> +    enum drm_buddy_free_tree tree;
>>   -    root = &mm->free_tree[drm_buddy_block_order(block)];
>> -    rb_erase(&block->rb, root);
>> +    tree = get_block_tree(block);
>> +    root = &mm->free_trees[tree][order];
>>   +    rb_erase(&block->rb, root);
>>       RB_CLEAR_NODE(&block->rb);
>>   }
>>   -static struct drm_buddy_block *
>> -rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
>> -{
>> -    struct rb_node *node = rb_last(&mm->free_tree[order]);
>> -
>> -    return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
>> -}
>> -
>> -static bool rbtree_is_empty(struct drm_buddy *mm, unsigned int order)
>> -{
>> -    return RB_EMPTY_ROOT(&mm->free_tree[order]);
>> -}
>> -
>>   static void clear_reset(struct drm_buddy_block *block)
>>   {
>>       block->header &= ~DRM_BUDDY_HEADER_CLEAR;
>> @@ -112,10 +134,13 @@ static void mark_allocated(struct drm_buddy *mm,
>>   static void mark_free(struct drm_buddy *mm,
>>                 struct drm_buddy_block *block)
>>   {
>> +    enum drm_buddy_free_tree tree;
>> +
>>       block->header &= ~DRM_BUDDY_HEADER_STATE;
>>       block->header |= DRM_BUDDY_FREE;
>>   -    rbtree_insert(mm, block);
>> +    tree = get_block_tree(block);
>> +    rbtree_insert(mm, block, tree);
>>   }
>>     static void mark_split(struct drm_buddy *mm,
>> @@ -201,6 +226,7 @@ static int __force_merge(struct drm_buddy *mm,
>>                u64 end,
>>                unsigned int min_order)
>>   {
>> +    enum drm_buddy_free_tree tree;
>>       unsigned int order;
>>       int i;
>>   @@ -210,45 +236,48 @@ static int __force_merge(struct drm_buddy *mm,
>>       if (min_order > mm->max_order)
>>           return -EINVAL;
>>   -    for (i = min_order - 1; i >= 0; i--) {
>> -        struct rb_root *root = &mm->free_tree[i];
>> -        struct rb_node *iter;
>> +    for_each_free_tree(tree) {
>> +        for (i = min_order - 1; i >= 0; i--) {
>> +            struct rb_node *iter = rb_last(&mm->free_trees[tree][i]);
>>   -        iter = rb_last(root);
>> -
>> -        while (iter) {
>> -            struct drm_buddy_block *block, *buddy;
>> -            u64 block_start, block_end;
>> +            while (iter) {
>> +                struct drm_buddy_block *block, *buddy;
>> +                u64 block_start, block_end;
>>   -            block = rbtree_get_free_block(iter);
>> -            iter = rb_prev(iter);
>> +                block = rbtree_get_free_block(iter);
>> +                iter = rb_prev(iter);
>>   -            if (!block || !block->parent)
>> -                continue;
>> +                if (!block || !block->parent)
>> +                    continue;
>>   -            block_start = drm_buddy_block_offset(block);
>> -            block_end = block_start + drm_buddy_block_size(mm, 
>> block) - 1;
>> +                block_start = drm_buddy_block_offset(block);
>> +                block_end = block_start + drm_buddy_block_size(mm, 
>> block) - 1;
>>   -            if (!contains(start, end, block_start, block_end))
>> -                continue;
>> +                if (!contains(start, end, block_start, block_end))
>> +                    continue;
>>   -            buddy = __get_buddy(block);
>> -            if (!drm_buddy_block_is_free(buddy))
>> -                continue;
>> +                buddy = __get_buddy(block);
>> +                if (!drm_buddy_block_is_free(buddy))
>> +                    continue;
>>   -            WARN_ON(drm_buddy_block_is_clear(block) ==
>> -                drm_buddy_block_is_clear(buddy));
>> +                WARN_ON(drm_buddy_block_is_clear(block) ==
>> +                    drm_buddy_block_is_clear(buddy));
>>   -            if (iter == &buddy->rb)
>> -                iter = rb_prev(iter);
>> +                /*
>> +                 * Advance to the next node when the current node is 
>> the buddy,
>> +                 * as freeing the block will also remove its buddy 
>> from the tree.
>> +                 */
>> +                if (iter == &buddy->rb)
>> +                    iter = rb_prev(iter);
>>   -            rbtree_remove(mm, block);
>> -            if (drm_buddy_block_is_clear(block))
>> -                mm->clear_avail -= drm_buddy_block_size(mm, block);
>> +                rbtree_remove(mm, block);
>> +                if (drm_buddy_block_is_clear(block))
>> +                    mm->clear_avail -= drm_buddy_block_size(mm, block);
>>   -            order = __drm_buddy_free(mm, block, true);
>> -            if (order >= min_order)
>> -                return 0;
>> +                order = __drm_buddy_free(mm, block, true);
>> +                if (order >= min_order)
>> +                    return 0;
>> +            }
>
> Do we need all these changes in __force_merge? Can't we just always 
> pick the dirty tree and keep everything else the same? If something is 
> non-merged there must be a dirty block preventing that, and when force 
> merging everything unmerged will be re-labled as dirty anyway, so the 
> second loop through the clean tree should always yield nothing?
When running the new KUnit performance test (Allocation under severe 
fragmentation), the test first allocates blocks of size 8 KiB with 64 
KiB alignment. As a result, the allocator initially allocates 64 KiB and 
then trims it down to 8 KiB. The allocations are then split into two 
groups and freed with different flags to prevent block coalescing. This 
creates a highly fragmented memory space with many scattered holes.

After this setup, the test attempts to allocate new 64 KiB blocks, which 
triggers repeated calls to force_merge() as it tries to merge smaller, 
lower-order blocks to form the required 64 KiB regions.
 From my testing, using only the dirty tree takes about 57 seconds, 
whereas iterating over the clear list first takes only about 186ms.

__force_merge
tree = 1 --- count = 30987, order = 3
tree = 1 --- count = 30987, order = 2
tree = 1 --- count = 30987, order = 1

__force_merge
tree = 0 --count=0, order = 3
tree = 0 --count=0, order = 2
tree = 0 --count=30334, order = 1

When only the dirty tree is used, the control unnecessarily iterates 
over the order-3 and order-2 entries before reaching order-1, causing a 
significant delay (Here order 3 and order 2 cannot be merged with their 
buddies, since they are split and not free). In contrast, iterating the 
clear tree first allows the control flow to skip directly to order-1 
(since orders 3 and 2 are empty), completing much faster.

Initially, AFAIK, the dirty tree tends to have more blocks in the higher 
orders. Keeping the clear tree first helps avoid these unnecessary 
iterations. Moreover, prioritizing the clear tree does not cause any 
functional difference - when a cleared block merges with a dirty block, 
the buddy is removed from the dirty tree as well. Therefore, iterating 
over the clear tree is harmless and, in certain cases, significantly 
improves performance compared to starting with the dirty tree.

This difference arises because allocations always come from the dirty 
list. The allocator trims each block to the requested size and reinserts 
the remaining fragments back into the dirty list, causing the dirty tree 
to accumulate far more blocks than the clear list. Please let me know 
your thoughts.

>
>>           }
>>       }
>>   @@ -269,7 +298,7 @@ static int __force_merge(struct drm_buddy *mm,
>>    */
>>   int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
>>   {
>> -    unsigned int i;
>> +    unsigned int i, j;
>>       u64 offset;
>>         if (size < chunk_size)
>> @@ -291,14 +320,22 @@ int drm_buddy_init(struct drm_buddy *mm, u64 
>> size, u64 chunk_size)
>>         BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
>>   -    mm->free_tree = kmalloc_array(mm->max_order + 1,
>> -                      sizeof(struct rb_root),
>> -                      GFP_KERNEL);
>> -    if (!mm->free_tree)
>> +    mm->free_trees = kmalloc_array(DRM_BUDDY_MAX_FREE_TREES,
>> +                       sizeof(*mm->free_trees),
>> +                       GFP_KERNEL);
>> +    if (!mm->free_trees)
>>           return -ENOMEM;
>>   -    for (i = 0; i <= mm->max_order; ++i)
>> -        mm->free_tree[i] = RB_ROOT;
>> +    for (i = 0; i < DRM_BUDDY_MAX_FREE_TREES; i++) {
>> +        mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
>> +                          sizeof(struct rb_root),
>> +                          GFP_KERNEL);
>> +        if (!mm->free_trees[i])
>> +            goto out_free_tree;
>> +
>> +        for (j = 0; j <= mm->max_order; ++j)
>> +            mm->free_trees[i][j] = RB_ROOT;
>> +    }
>>         mm->n_roots = hweight64(size);
>>   @@ -346,7 +383,9 @@ int drm_buddy_init(struct drm_buddy *mm, u64 
>> size, u64 chunk_size)
>>           drm_block_free(mm, mm->roots[i]);
>>       kfree(mm->roots);
>>   out_free_tree:
>> -    kfree(mm->free_tree);
>> +    while (i--)
>> +        kfree(mm->free_trees[i]);
>
> out_free_roots is also decrementing 'i' here it seems, so looks like 
> this might blow up?
Good catch, thanks. I have fixed the problem.
>
>> +    kfree(mm->free_trees);
>>       return -ENOMEM;
>>   }
>>   EXPORT_SYMBOL(drm_buddy_init);
>> @@ -382,8 +421,9 @@ void drm_buddy_fini(struct drm_buddy *mm)
>>         WARN_ON(mm->avail != mm->size);
>>   +    for (i = 0; i < DRM_BUDDY_MAX_FREE_TREES; i++)
>> +        kfree(mm->free_trees[i]);
>>       kfree(mm->roots);
>> -    kfree(mm->free_tree);
>>   }
>>   EXPORT_SYMBOL(drm_buddy_fini);
>>   @@ -407,8 +447,7 @@ static int split_block(struct drm_buddy *mm,
>>           return -ENOMEM;
>>       }
>>   -    mark_free(mm, block->left);
>> -    mark_free(mm, block->right);
>> +    mark_split(mm, block);
>>         if (drm_buddy_block_is_clear(block)) {
>>           mark_cleared(block->left);
>> @@ -416,7 +455,8 @@ static int split_block(struct drm_buddy *mm,
>>           clear_reset(block);
>>       }
>>   -    mark_split(mm, block);
>> +    mark_free(mm, block->left);
>> +    mark_free(mm, block->right);
>>         return 0;
>>   }
>> @@ -449,6 +489,7 @@ EXPORT_SYMBOL(drm_get_buddy);
>>    */
>>   void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
>>   {
>> +    enum drm_buddy_free_tree src_tree, dst_tree;
>>       u64 root_size, size, start;
>>       unsigned int order;
>>       int i;
>> @@ -463,19 +504,24 @@ void drm_buddy_reset_clear(struct drm_buddy 
>> *mm, bool is_clear)
>>           size -= root_size;
>>       }
>>   +    src_tree = is_clear ? DRM_BUDDY_DIRTY_TREE : 
>> DRM_BUDDY_CLEAR_TREE;
>> +    dst_tree = is_clear ? DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
>> +
>>       for (i = 0; i <= mm->max_order; ++i) {
>> +        struct rb_root *root = &mm->free_trees[src_tree][i];
>>           struct drm_buddy_block *block, *tmp;
>>   -        rbtree_postorder_for_each_entry_safe(block, tmp, 
>> &mm->free_tree[i], rb) {
>> -            if (is_clear != drm_buddy_block_is_clear(block)) {
>> -                if (is_clear) {
>> -                    mark_cleared(block);
>> -                    mm->clear_avail += drm_buddy_block_size(mm, block);
>> -                } else {
>> -                    clear_reset(block);
>> -                    mm->clear_avail -= drm_buddy_block_size(mm, block);
>> -                }
>> +        rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
>> +            rbtree_remove(mm, block);
>> +            if (is_clear) {
>> +                mark_cleared(block);
>> +                mm->clear_avail += drm_buddy_block_size(mm, block);
>> +            } else {
>> +                clear_reset(block);
>> +                mm->clear_avail -= drm_buddy_block_size(mm, block);
>>               }
>> +
>> +            rbtree_insert(mm, block, dst_tree);
>>           }
>>       }
>>   }
>> @@ -665,27 +711,17 @@ __drm_buddy_alloc_range_bias(struct drm_buddy *mm,
>>   }
>>     static struct drm_buddy_block *
>> -get_maxblock(struct drm_buddy *mm, unsigned int order,
>> -         unsigned long flags)
>> +get_maxblock(struct drm_buddy *mm,
>> +         unsigned int order,
>> +         enum drm_buddy_free_tree tree)
>>   {
>>       struct drm_buddy_block *max_block = NULL, *block = NULL;
>> +    struct rb_root *root;
>>       unsigned int i;
>>         for (i = order; i <= mm->max_order; ++i) {
>> -        struct rb_node *iter = rb_last(&mm->free_tree[i]);
>> -        struct drm_buddy_block *tmp_block;
>> -
>> -        while (iter) {
>> -            tmp_block = rbtree_get_free_block(iter);
>> -
>> -            if (!block_incompatible(tmp_block, flags)) {
>> -                block = tmp_block;
>> -                break;
>> -            }
>> -
>> -            iter = rb_prev(iter);
>> -        }
>> -
>> +        root = &mm->free_trees[tree][i];
>> +        block = rbtree_last_free_block(root);
>>           if (!block)
>>               continue;
>>   @@ -709,43 +745,39 @@ alloc_from_freetree(struct drm_buddy *mm,
>>               unsigned long flags)
>>   {
>>       struct drm_buddy_block *block = NULL;
>> +    struct rb_root *root;
>> +    enum drm_buddy_free_tree tree;
>>       unsigned int tmp;
>>       int err;
>>   +    tree = (flags & DRM_BUDDY_CLEAR_ALLOCATION) ?
>> +        DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
>> +
>>       if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
>> -        block = get_maxblock(mm, order, flags);
>> +        block = get_maxblock(mm, order, tree);
>>           if (block)
>>               /* Store the obtained block order */
>>               tmp = drm_buddy_block_order(block);
>>       } else {
>>           for (tmp = order; tmp <= mm->max_order; ++tmp) {
>> -            struct rb_node *iter = rb_last(&mm->free_tree[tmp]);
>> -            struct drm_buddy_block *tmp_block;
>> -
>> -            while (iter) {
>> -                tmp_block = rbtree_get_free_block(iter);
>> -
>> -                if (!block_incompatible(tmp_block, flags)) {
>> -                    block = tmp_block;
>> -                    break;
>> -                }
>> -
>> -                iter = rb_prev(iter);
>> -            }
>> -
>> +            /* Get RB tree root for this order and tree */
>> +            root = &mm->free_trees[tree][tmp];
>> +            block = rbtree_last_free_block(root);
>>               if (block)
>>                   break;
>>           }
>>       }
>>         if (!block) {
>> -        /* Fallback method */
>> +        /* Try allocating from the other tree */
>> +        tree = (tree == DRM_BUDDY_CLEAR_TREE) ?
>> +            DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE;
>> +
>>           for (tmp = order; tmp <= mm->max_order; ++tmp) {
>> -            if (!rbtree_is_empty(mm, tmp)) {
>
> Did you mean to drop the is_empty() check here? If it's overkill I 
> think better just not add it in the previous patch?
Modified in v8.
>
>> -                block = rbtree_last_entry(mm, tmp);
>> -                if (block)
>> -                    break;
>> -            }
>> +            root = &mm->free_trees[tree][tmp];
>> +            block = rbtree_last_free_block(root);
>> +            if (block)
>> +                break;
>>           }
>>             if (!block)
>> @@ -887,9 +919,9 @@ static int __alloc_contig_try_harder(struct 
>> drm_buddy *mm,
>>                        struct list_head *blocks)
>>   {
>>       u64 rhs_offset, lhs_offset, lhs_size, filled;
>> +    enum drm_buddy_free_tree tree;
>>       struct drm_buddy_block *block;
>>       LIST_HEAD(blocks_lhs);
>> -    struct rb_node *iter;
>>       unsigned long pages;
>>       unsigned int order;
>>       u64 modify_size;
>> @@ -901,40 +933,43 @@ static int __alloc_contig_try_harder(struct 
>> drm_buddy *mm,
>>       if (order == 0)
>>           return -ENOSPC;
>>   -    if (rbtree_is_empty(mm, order))
>> +    if 
>> (rbtree_is_empty(&mm->free_trees[DRM_BUDDY_CLEAR_TREE][order]) &&
>> + rbtree_is_empty(&mm->free_trees[DRM_BUDDY_DIRTY_TREE][order]))
>
> Could potentially merge this with the for_each_tree loop below?
Modified in v8.

Regards,
Arun.
>
> Otherwise,
> Reviewed-by: Matthew Auld <matthew.auld@intel.com>
>
>>           return -ENOSPC;
>>   -    iter = rb_last(&mm->free_tree[order]);
>> -
>> -    while (iter) {
>> -        block = rbtree_get_free_block(iter);
>> -
>> -        /* Allocate blocks traversing RHS */
>> -        rhs_offset = drm_buddy_block_offset(block);
>> -        err =  __drm_buddy_alloc_range(mm, rhs_offset, size,
>> -                           &filled, blocks);
>> -        if (!err || err != -ENOSPC)
>> -            return err;
>> -
>> -        lhs_size = max((size - filled), min_block_size);
>> -        if (!IS_ALIGNED(lhs_size, min_block_size))
>> -            lhs_size = round_up(lhs_size, min_block_size);
>> -
>> -        /* Allocate blocks traversing LHS */
>> -        lhs_offset = drm_buddy_block_offset(block) - lhs_size;
>> -        err =  __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
>> -                           NULL, &blocks_lhs);
>> -        if (!err) {
>> -            list_splice(&blocks_lhs, blocks);
>> -            return 0;
>> -        } else if (err != -ENOSPC) {
>> +    for_each_free_tree(tree) {
>> +        struct rb_node *iter = rb_last(&mm->free_trees[tree][order]);
>> +
>> +        while (iter) {
>> +            block = rbtree_get_free_block(iter);
>> +
>> +            /* Allocate blocks traversing RHS */
>> +            rhs_offset = drm_buddy_block_offset(block);
>> +            err =  __drm_buddy_alloc_range(mm, rhs_offset, size,
>> +                               &filled, blocks);
>> +            if (!err || err != -ENOSPC)
>> +                return err;
>> +
>> +            lhs_size = max((size - filled), min_block_size);
>> +            if (!IS_ALIGNED(lhs_size, min_block_size))
>> +                lhs_size = round_up(lhs_size, min_block_size);
>> +
>> +            /* Allocate blocks traversing LHS */
>> +            lhs_offset = drm_buddy_block_offset(block) - lhs_size;
>> +            err =  __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
>> +                               NULL, &blocks_lhs);
>> +            if (!err) {
>> +                list_splice(&blocks_lhs, blocks);
>> +                return 0;
>> +            } else if (err != -ENOSPC) {
>> +                drm_buddy_free_list_internal(mm, blocks);
>> +                return err;
>> +            }
>> +            /* Free blocks for the next iteration */
>>               drm_buddy_free_list_internal(mm, blocks);
>> -            return err;
>> -        }
>> -        /* Free blocks for the next iteration */
>> -        drm_buddy_free_list_internal(mm, blocks);
>>   -        iter = rb_prev(iter);
>> +            iter = rb_prev(iter);
>> +        }
>>       }
>>         return -ENOSPC;
>> @@ -1239,6 +1274,7 @@ EXPORT_SYMBOL(drm_buddy_block_print);
>>    */
>>   void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
>>   {
>> +    enum drm_buddy_free_tree tree;
>>       int order;
>>         drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: 
>> %lluMiB, clear_free: %lluMiB\n",
>> @@ -1246,11 +1282,16 @@ void drm_buddy_print(struct drm_buddy *mm, 
>> struct drm_printer *p)
>>         for (order = mm->max_order; order >= 0; order--) {
>>           struct drm_buddy_block *block, *tmp;
>> +        struct rb_root *root;
>>           u64 count = 0, free;
>>   -        rbtree_postorder_for_each_entry_safe(block, tmp, 
>> &mm->free_tree[order], rb) {
>> -            BUG_ON(!drm_buddy_block_is_free(block));
>> -            count++;
>> +        for_each_free_tree(tree) {
>> +            root = &mm->free_trees[tree][order];
>> +
>> +            rbtree_postorder_for_each_entry_safe(block, tmp, root, 
>> rb) {
>> +                BUG_ON(!drm_buddy_block_is_free(block));
>> +                count++;
>> +            }
>>           }
>>             drm_printf(p, "order-%2d ", order);
>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>> index 9ee105d4309f..d7891d08f67a 100644
>> --- a/include/drm/drm_buddy.h
>> +++ b/include/drm/drm_buddy.h
>> @@ -73,7 +73,7 @@ struct drm_buddy_block {
>>    */
>>   struct drm_buddy {
>>       /* Maintain a free list for each order. */
>> -    struct rb_root *free_tree;
>> +    struct rb_root **free_trees;
>>         /*
>>        * Maintain explicit binary tree(s) to track the allocation of the
>


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2025-10-04  8:31 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-09-23  9:02 [PATCH v7 1/3] drm/buddy: Optimize free block management with RB tree Arunpravin Paneer Selvam
2025-09-23  9:02 ` [PATCH v7 2/3] drm/buddy: Separate clear and dirty free block trees Arunpravin Paneer Selvam
2025-09-26 17:08   ` Matthew Auld
2025-10-04  8:30     ` Arunpravin Paneer Selvam
2025-09-23  9:02 ` [PATCH v7 3/3] drm/buddy: Add KUnit tests for allocator performance under fragmentation Arunpravin Paneer Selvam
2025-09-26 11:00   ` Matthew Auld
2025-10-01  3:59     ` Arunpravin Paneer Selvam
2025-09-23  9:52 ` ✓ i915.CI.BAT: success for series starting with [v7,1/3] drm/buddy: Optimize free block management with RB tree Patchwork
2025-09-23 10:58 ` ✗ i915.CI.Full: failure " Patchwork
2025-09-26  5:41 ` [PATCH v7 1/3] " Arunpravin Paneer Selvam
2025-09-26 16:05 ` Matthew Auld

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox