* [PATCH v3 1/2] drm/buddy: Optimize free block management with RB tree
@ 2025-08-21 15:06 Arunpravin Paneer Selvam
2025-08-21 15:06 ` [PATCH v3 2/2] drm/buddy: Separate clear and dirty free block trees Arunpravin Paneer Selvam
` (3 more replies)
0 siblings, 4 replies; 10+ messages in thread
From: Arunpravin Paneer Selvam @ 2025-08-21 15:06 UTC (permalink / raw)
To: christian.koenig, matthew.auld, dri-devel, amd-gfx, intel-gfx,
intel-xe
Cc: alexander.deucher, Arunpravin Paneer Selvam
Replace the freelist (O(n)) used for free block management with a
red-black tree, providing more efficient O(log n) search, insert,
and delete operations. This improves scalability and performance
when managing large numbers of free blocks per order (e.g., hundreds
or thousands).
In the VK-CTS memory stress subtest, the buddy manager merges
fragmented memory and inserts freed blocks into the freelist. Since
freelist insertion is O(n), this becomes a bottleneck as fragmentation
increases. Benchmarking shows list_insert_sorted() consumes ~52.69% CPU
with the freelist, compared to just 0.03% with the RB tree
(rbtree_insert.isra.0), despite performing the same sorted insert.
This also improves performance in heavily fragmented workloads,
such as games or graphics tests that stress memory.
v3(Matthew):
- Remove RB_EMPTY_NODE check in force_merge function.
- Rename rb for loop macros to have less generic names and move to
.c file.
- Make the rb node rb and link field as union.
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
---
drivers/gpu/drm/drm_buddy.c | 175 +++++++++++++++++++++++++-----------
include/drm/drm_buddy.h | 9 +-
2 files changed, 130 insertions(+), 54 deletions(-)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index a94061f373de..92226a46cc2c 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -14,6 +14,41 @@
static struct kmem_cache *slab_blocks;
+/*
+ * for_each_rb_free_block() - iterate over an RB tree in order
+ * @pos: the struct type * to use as a loop cursor
+ * @root: pointer to struct rb_root to iterate
+ * @member: name of the rb_node field within the struct
+ */
+#define for_each_rb_free_block(pos, root, member) \
+ for (pos = rb_entry_safe(rb_first(root), typeof(*pos), member); \
+ pos; \
+ pos = rb_entry_safe(rb_next(&(pos)->member), typeof(*pos), member))
+
+/*
+ * for_each_rb_free_block_reverse() - iterate over an RB tree in reverse order
+ * @pos: the struct type * to use as a loop cursor
+ * @root: pointer to struct rb_root to iterate
+ * @member: name of the rb_node field within the struct
+ */
+#define for_each_rb_free_block_reverse(pos, root, member) \
+ for (pos = rb_entry_safe(rb_last(root), typeof(*pos), member); \
+ pos; \
+ pos = rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), member))
+
+/**
+ * for_each_rb_free_block_reverse_safe() - safely iterate over an RB tree in reverse order
+ * @pos: the struct type * to use as a loop cursor.
+ * @n: another struct type * to use as temporary storage.
+ * @root: pointer to struct rb_root to iterate.
+ * @member: name of the rb_node field within the struct.
+ */
+#define for_each_rb_free_block_reverse_safe(pos, n, root, member) \
+ for (pos = rb_entry_safe(rb_last(root), typeof(*pos), member), \
+ n = pos ? rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), member) : NULL; \
+ pos; \
+ pos = n, n = pos ? rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), member) : NULL)
+
static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
struct drm_buddy_block *parent,
unsigned int order,
@@ -31,6 +66,8 @@ static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
block->header |= order;
block->parent = parent;
+ RB_CLEAR_NODE(&block->rb);
+
BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
return block;
}
@@ -41,23 +78,53 @@ static void drm_block_free(struct drm_buddy *mm,
kmem_cache_free(slab_blocks, block);
}
-static void list_insert_sorted(struct drm_buddy *mm,
- struct drm_buddy_block *block)
+static void rbtree_insert(struct drm_buddy *mm,
+ struct drm_buddy_block *block)
{
+ struct rb_root *root = &mm->free_tree[drm_buddy_block_order(block)];
+ struct rb_node **link = &root->rb_node;
+ struct rb_node *parent = NULL;
struct drm_buddy_block *node;
- struct list_head *head;
+ u64 offset;
+
+ offset = drm_buddy_block_offset(block);
- head = &mm->free_list[drm_buddy_block_order(block)];
- if (list_empty(head)) {
- list_add(&block->link, head);
- return;
+ while (*link) {
+ parent = *link;
+ node = rb_entry(parent, struct drm_buddy_block, rb);
+
+ if (offset < drm_buddy_block_offset(node))
+ link = &parent->rb_left;
+ else
+ link = &parent->rb_right;
}
- list_for_each_entry(node, head, link)
- if (drm_buddy_block_offset(block) < drm_buddy_block_offset(node))
- break;
+ rb_link_node(&block->rb, parent, link);
+ rb_insert_color(&block->rb, root);
+}
- __list_add(&block->link, node->link.prev, &node->link);
+static void rbtree_remove(struct drm_buddy *mm,
+ struct drm_buddy_block *block)
+{
+ struct rb_root *root;
+
+ root = &mm->free_tree[drm_buddy_block_order(block)];
+ rb_erase(&block->rb, root);
+
+ RB_CLEAR_NODE(&block->rb);
+}
+
+static inline struct drm_buddy_block *
+rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
+{
+ struct rb_node *node = rb_last(&mm->free_tree[order]);
+
+ return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
+}
+
+static bool rbtree_is_empty(struct drm_buddy *mm, unsigned int order)
+{
+ return RB_EMPTY_ROOT(&mm->free_tree[order]);
}
static void clear_reset(struct drm_buddy_block *block)
@@ -70,12 +137,13 @@ static void mark_cleared(struct drm_buddy_block *block)
block->header |= DRM_BUDDY_HEADER_CLEAR;
}
-static void mark_allocated(struct drm_buddy_block *block)
+static void mark_allocated(struct drm_buddy *mm,
+ struct drm_buddy_block *block)
{
block->header &= ~DRM_BUDDY_HEADER_STATE;
block->header |= DRM_BUDDY_ALLOCATED;
- list_del(&block->link);
+ rbtree_remove(mm, block);
}
static void mark_free(struct drm_buddy *mm,
@@ -84,15 +152,16 @@ static void mark_free(struct drm_buddy *mm,
block->header &= ~DRM_BUDDY_HEADER_STATE;
block->header |= DRM_BUDDY_FREE;
- list_insert_sorted(mm, block);
+ rbtree_insert(mm, block);
}
-static void mark_split(struct drm_buddy_block *block)
+static void mark_split(struct drm_buddy *mm,
+ struct drm_buddy_block *block)
{
block->header &= ~DRM_BUDDY_HEADER_STATE;
block->header |= DRM_BUDDY_SPLIT;
- list_del(&block->link);
+ rbtree_remove(mm, block);
}
static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
@@ -148,7 +217,7 @@ static unsigned int __drm_buddy_free(struct drm_buddy *mm,
mark_cleared(parent);
}
- list_del(&buddy->link);
+ rbtree_remove(mm, buddy);
if (force_merge && drm_buddy_block_is_clear(buddy))
mm->clear_avail -= drm_buddy_block_size(mm, buddy);
@@ -179,9 +248,11 @@ static int __force_merge(struct drm_buddy *mm,
return -EINVAL;
for (i = min_order - 1; i >= 0; i--) {
- struct drm_buddy_block *block, *prev;
+ struct drm_buddy_block *block, *prev_block, *first_block;
- list_for_each_entry_safe_reverse(block, prev, &mm->free_list[i], link) {
+ first_block = rb_entry(rb_first(&mm->free_tree[i]), struct drm_buddy_block, rb);
+
+ for_each_rb_free_block_reverse_safe(block, prev_block, &mm->free_tree[i], rb) {
struct drm_buddy_block *buddy;
u64 block_start, block_end;
@@ -206,10 +277,14 @@ static int __force_merge(struct drm_buddy *mm,
* block in the next iteration as we would free the
* buddy block as part of the free function.
*/
- if (prev == buddy)
- prev = list_prev_entry(prev, link);
+ if (prev_block && prev_block == buddy) {
+ if (prev_block != first_block)
+ prev_block = rb_entry(rb_prev(&prev_block->rb),
+ struct drm_buddy_block,
+ rb);
+ }
- list_del(&block->link);
+ rbtree_remove(mm, block);
if (drm_buddy_block_is_clear(block))
mm->clear_avail -= drm_buddy_block_size(mm, block);
@@ -258,14 +333,14 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
- mm->free_list = kmalloc_array(mm->max_order + 1,
- sizeof(struct list_head),
+ mm->free_tree = kmalloc_array(mm->max_order + 1,
+ sizeof(struct rb_root),
GFP_KERNEL);
- if (!mm->free_list)
+ if (!mm->free_tree)
return -ENOMEM;
for (i = 0; i <= mm->max_order; ++i)
- INIT_LIST_HEAD(&mm->free_list[i]);
+ mm->free_tree[i] = RB_ROOT;
mm->n_roots = hweight64(size);
@@ -273,7 +348,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
sizeof(struct drm_buddy_block *),
GFP_KERNEL);
if (!mm->roots)
- goto out_free_list;
+ goto out_free_tree;
offset = 0;
i = 0;
@@ -312,8 +387,8 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
while (i--)
drm_block_free(mm, mm->roots[i]);
kfree(mm->roots);
-out_free_list:
- kfree(mm->free_list);
+out_free_tree:
+ kfree(mm->free_tree);
return -ENOMEM;
}
EXPORT_SYMBOL(drm_buddy_init);
@@ -323,7 +398,7 @@ EXPORT_SYMBOL(drm_buddy_init);
*
* @mm: DRM buddy manager to free
*
- * Cleanup memory manager resources and the freelist
+ * Cleanup memory manager resources and the freetree
*/
void drm_buddy_fini(struct drm_buddy *mm)
{
@@ -350,7 +425,7 @@ void drm_buddy_fini(struct drm_buddy *mm)
WARN_ON(mm->avail != mm->size);
kfree(mm->roots);
- kfree(mm->free_list);
+ kfree(mm->free_tree);
}
EXPORT_SYMBOL(drm_buddy_fini);
@@ -383,7 +458,7 @@ static int split_block(struct drm_buddy *mm,
clear_reset(block);
}
- mark_split(block);
+ mark_split(mm, block);
return 0;
}
@@ -433,7 +508,7 @@ void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
for (i = 0; i <= mm->max_order; ++i) {
struct drm_buddy_block *block;
- list_for_each_entry_reverse(block, &mm->free_list[i], link) {
+ for_each_rb_free_block_reverse(block, &mm->free_tree[i], rb) {
if (is_clear != drm_buddy_block_is_clear(block)) {
if (is_clear) {
mark_cleared(block);
@@ -641,7 +716,7 @@ get_maxblock(struct drm_buddy *mm, unsigned int order,
for (i = order; i <= mm->max_order; ++i) {
struct drm_buddy_block *tmp_block;
- list_for_each_entry_reverse(tmp_block, &mm->free_list[i], link) {
+ for_each_rb_free_block_reverse(tmp_block, &mm->free_tree[i], rb) {
if (block_incompatible(tmp_block, flags))
continue;
@@ -667,7 +742,7 @@ get_maxblock(struct drm_buddy *mm, unsigned int order,
}
static struct drm_buddy_block *
-alloc_from_freelist(struct drm_buddy *mm,
+alloc_from_freetree(struct drm_buddy *mm,
unsigned int order,
unsigned long flags)
{
@@ -684,7 +759,7 @@ alloc_from_freelist(struct drm_buddy *mm,
for (tmp = order; tmp <= mm->max_order; ++tmp) {
struct drm_buddy_block *tmp_block;
- list_for_each_entry_reverse(tmp_block, &mm->free_list[tmp], link) {
+ for_each_rb_free_block_reverse(tmp_block, &mm->free_tree[tmp], rb) {
if (block_incompatible(tmp_block, flags))
continue;
@@ -700,10 +775,8 @@ alloc_from_freelist(struct drm_buddy *mm,
if (!block) {
/* Fallback method */
for (tmp = order; tmp <= mm->max_order; ++tmp) {
- if (!list_empty(&mm->free_list[tmp])) {
- block = list_last_entry(&mm->free_list[tmp],
- struct drm_buddy_block,
- link);
+ if (!rbtree_is_empty(mm, tmp)) {
+ block = rbtree_last_entry(mm, tmp);
if (block)
break;
}
@@ -771,7 +844,7 @@ static int __alloc_range(struct drm_buddy *mm,
if (contains(start, end, block_start, block_end)) {
if (drm_buddy_block_is_free(block)) {
- mark_allocated(block);
+ mark_allocated(mm, block);
total_allocated += drm_buddy_block_size(mm, block);
mm->avail -= drm_buddy_block_size(mm, block);
if (drm_buddy_block_is_clear(block))
@@ -849,7 +922,6 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
{
u64 rhs_offset, lhs_offset, lhs_size, filled;
struct drm_buddy_block *block;
- struct list_head *list;
LIST_HEAD(blocks_lhs);
unsigned long pages;
unsigned int order;
@@ -862,11 +934,10 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
if (order == 0)
return -ENOSPC;
- list = &mm->free_list[order];
- if (list_empty(list))
+ if (rbtree_is_empty(mm, order))
return -ENOSPC;
- list_for_each_entry_reverse(block, list, link) {
+ for_each_rb_free_block_reverse(block, &mm->free_tree[order], rb) {
/* Allocate blocks traversing RHS */
rhs_offset = drm_buddy_block_offset(block);
err = __drm_buddy_alloc_range(mm, rhs_offset, size,
@@ -976,7 +1047,7 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
list_add(&block->tmp_link, &dfs);
err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
if (err) {
- mark_allocated(block);
+ mark_allocated(mm, block);
mm->avail -= drm_buddy_block_size(mm, block);
if (drm_buddy_block_is_clear(block))
mm->clear_avail -= drm_buddy_block_size(mm, block);
@@ -999,8 +1070,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
return __drm_buddy_alloc_range_bias(mm, start, end,
order, flags);
else
- /* Allocate from freelist */
- return alloc_from_freelist(mm, order, flags);
+ /* Allocate from freetree */
+ return alloc_from_freetree(mm, order, flags);
}
/**
@@ -1017,8 +1088,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
* alloc_range_bias() called on range limitations, which traverses
* the tree and returns the desired block.
*
- * alloc_from_freelist() called when *no* range restrictions
- * are enforced, which picks the block from the freelist.
+ * alloc_from_freetree() called when *no* range restrictions
+ * are enforced, which picks the block from the freetree.
*
* Returns:
* 0 on success, error code on failure.
@@ -1120,7 +1191,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
}
} while (1);
- mark_allocated(block);
+ mark_allocated(mm, block);
mm->avail -= drm_buddy_block_size(mm, block);
if (drm_buddy_block_is_clear(block))
mm->clear_avail -= drm_buddy_block_size(mm, block);
@@ -1204,7 +1275,7 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
struct drm_buddy_block *block;
u64 count = 0, free;
- list_for_each_entry(block, &mm->free_list[order], link) {
+ for_each_rb_free_block(block, &mm->free_tree[order], rb) {
BUG_ON(!drm_buddy_block_is_free(block));
count++;
}
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index 513837632b7d..091823592034 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -10,6 +10,7 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/rbtree.h>
#include <drm/drm_print.h>
@@ -53,7 +54,11 @@ struct drm_buddy_block {
* a list, if so desired. As soon as the block is freed with
* drm_buddy_free* ownership is given back to the mm.
*/
- struct list_head link;
+ union {
+ struct rb_node rb;
+ struct list_head link;
+ };
+
struct list_head tmp_link;
};
@@ -68,7 +73,7 @@ struct drm_buddy_block {
*/
struct drm_buddy {
/* Maintain a free list for each order. */
- struct list_head *free_list;
+ struct rb_root *free_tree;
/*
* Maintain explicit binary tree(s) to track the allocation of the
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v3 2/2] drm/buddy: Separate clear and dirty free block trees
2025-08-21 15:06 [PATCH v3 1/2] drm/buddy: Optimize free block management with RB tree Arunpravin Paneer Selvam
@ 2025-08-21 15:06 ` Arunpravin Paneer Selvam
2025-08-22 12:02 ` Matthew Auld
2025-08-21 17:09 ` ✗ CI.checkpatch: warning for series starting with [v3,1/2] drm/buddy: Optimize free block management with RB tree Patchwork
` (2 subsequent siblings)
3 siblings, 1 reply; 10+ messages in thread
From: Arunpravin Paneer Selvam @ 2025-08-21 15:06 UTC (permalink / raw)
To: christian.koenig, matthew.auld, dri-devel, amd-gfx, intel-gfx,
intel-xe
Cc: alexander.deucher, Arunpravin Paneer Selvam
Maintain two separate RB trees per order - one for clear (zeroed) blocks
and another for dirty (uncleared) blocks. This separation improves
code clarity and makes it more obvious which tree is being searched
during allocation. It also improves scalability and efficiency when
searching for a specific type of block, avoiding unnecessary checks
and making the allocator more predictable under fragmentation.
The changes have been validated using the existing drm_buddy_test
KUnit test cases, along with selected graphics workloads,
to ensure correctness and avoid regressions.
v2: Missed adding the suggested-by tag. Added it in v2.
v3(Matthew):
- Remove the double underscores from the internal functions.
- Rename the internal functions to have less generic names.
- Fix the error handling code.
- Pass tree argument for the tree macro.
- Use the existing dirty/free bit instead of new tree field.
- Make free_trees[] instead of clear_tree and dirty_tree for
more cleaner approach.
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Suggested-by: Matthew Auld <matthew.auld@intel.com>
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4260
---
drivers/gpu/drm/drm_buddy.c | 342 ++++++++++++++++++++++--------------
include/drm/drm_buddy.h | 8 +-
2 files changed, 215 insertions(+), 135 deletions(-)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 92226a46cc2c..f57c384889a9 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -14,6 +14,9 @@
static struct kmem_cache *slab_blocks;
+#define for_each_free_tree(tree) \
+ for ((tree) = CLEAR_TREE; (tree) < MAX_FREE_TREES; (tree)++)
+
/*
* for_each_rb_free_block() - iterate over an RB tree in order
* @pos: the struct type * to use as a loop cursor
@@ -78,22 +81,77 @@ static void drm_block_free(struct drm_buddy *mm,
kmem_cache_free(slab_blocks, block);
}
+static inline struct rb_root *
+get_root(struct drm_buddy *mm,
+ unsigned int order,
+ enum free_tree tree)
+{
+ if (tree == CLEAR_TREE)
+ return &mm->free_trees[CLEAR_TREE][order];
+ else
+ return &mm->free_trees[DIRTY_TREE][order];
+}
+
+static inline enum free_tree
+get_tree_for_block(struct drm_buddy_block *block)
+{
+ return drm_buddy_block_is_clear(block) ? CLEAR_TREE : DIRTY_TREE;
+}
+
+static inline enum free_tree
+get_tree_for_flags(unsigned long flags)
+{
+ return (flags & DRM_BUDDY_CLEAR_ALLOCATION) ? CLEAR_TREE : DIRTY_TREE;
+}
+
+static inline struct drm_buddy_block *
+rbtree_get_free_block(struct rb_node *node)
+{
+ return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
+}
+
+static inline struct drm_buddy_block *
+rbtree_prev_free_block(struct rb_node *node)
+{
+ return rbtree_get_free_block(rb_prev(node));
+}
+
+static inline struct drm_buddy_block *
+rbtree_first_free_block(struct rb_root *root)
+{
+ return rbtree_get_free_block(rb_first(root));
+}
+
+static inline struct drm_buddy_block *
+rbtree_last_free_block(struct rb_root *root)
+{
+ return rbtree_get_free_block(rb_last(root));
+}
+
+static inline bool rbtree_is_empty(struct rb_root *root)
+{
+ return RB_EMPTY_ROOT(root);
+}
+
static void rbtree_insert(struct drm_buddy *mm,
- struct drm_buddy_block *block)
+ struct drm_buddy_block *block,
+ enum free_tree tree)
{
- struct rb_root *root = &mm->free_tree[drm_buddy_block_order(block)];
- struct rb_node **link = &root->rb_node;
- struct rb_node *parent = NULL;
+ struct rb_node **link, *parent = NULL;
struct drm_buddy_block *node;
- u64 offset;
+ struct rb_root *root;
+ unsigned int order;
+
+ order = drm_buddy_block_order(block);
- offset = drm_buddy_block_offset(block);
+ root = get_root(mm, order, tree);
+ link = &root->rb_node;
while (*link) {
parent = *link;
- node = rb_entry(parent, struct drm_buddy_block, rb);
+ node = rbtree_get_free_block(parent);
- if (offset < drm_buddy_block_offset(node))
+ if (drm_buddy_block_offset(block) < drm_buddy_block_offset(node))
link = &parent->rb_left;
else
link = &parent->rb_right;
@@ -106,27 +164,19 @@ static void rbtree_insert(struct drm_buddy *mm,
static void rbtree_remove(struct drm_buddy *mm,
struct drm_buddy_block *block)
{
+ unsigned int order = drm_buddy_block_order(block);
+ enum free_tree tree;
struct rb_root *root;
- root = &mm->free_tree[drm_buddy_block_order(block)];
+ tree = drm_buddy_block_is_clear(block) ?
+ CLEAR_TREE : DIRTY_TREE;
+
+ root = get_root(mm, order, tree);
rb_erase(&block->rb, root);
RB_CLEAR_NODE(&block->rb);
}
-static inline struct drm_buddy_block *
-rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
-{
- struct rb_node *node = rb_last(&mm->free_tree[order]);
-
- return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
-}
-
-static bool rbtree_is_empty(struct drm_buddy *mm, unsigned int order)
-{
- return RB_EMPTY_ROOT(&mm->free_tree[order]);
-}
-
static void clear_reset(struct drm_buddy_block *block)
{
block->header &= ~DRM_BUDDY_HEADER_CLEAR;
@@ -149,10 +199,14 @@ static void mark_allocated(struct drm_buddy *mm,
static void mark_free(struct drm_buddy *mm,
struct drm_buddy_block *block)
{
+ enum free_tree tree;
+
block->header &= ~DRM_BUDDY_HEADER_STATE;
block->header |= DRM_BUDDY_FREE;
- rbtree_insert(mm, block);
+ tree = get_tree_for_block(block);
+
+ rbtree_insert(mm, block, tree);
}
static void mark_split(struct drm_buddy *mm,
@@ -238,6 +292,7 @@ static int __force_merge(struct drm_buddy *mm,
u64 end,
unsigned int min_order)
{
+ enum free_tree tree;
unsigned int order;
int i;
@@ -247,50 +302,49 @@ static int __force_merge(struct drm_buddy *mm,
if (min_order > mm->max_order)
return -EINVAL;
- for (i = min_order - 1; i >= 0; i--) {
- struct drm_buddy_block *block, *prev_block, *first_block;
-
- first_block = rb_entry(rb_first(&mm->free_tree[i]), struct drm_buddy_block, rb);
+ for_each_free_tree(tree) {
+ for (i = min_order - 1; i >= 0; i--) {
+ struct rb_root *root = get_root(mm, i, tree);
+ struct drm_buddy_block *block, *prev_block;
- for_each_rb_free_block_reverse_safe(block, prev_block, &mm->free_tree[i], rb) {
- struct drm_buddy_block *buddy;
- u64 block_start, block_end;
+ for_each_rb_free_block_reverse_safe(block, prev_block, root, rb) {
+ struct drm_buddy_block *buddy;
+ u64 block_start, block_end;
- if (!block->parent)
- continue;
+ if (!block->parent)
+ continue;
- block_start = drm_buddy_block_offset(block);
- block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+ block_start = drm_buddy_block_offset(block);
+ block_end = block_start + drm_buddy_block_size(mm, block) - 1;
- if (!contains(start, end, block_start, block_end))
- continue;
+ if (!contains(start, end, block_start, block_end))
+ continue;
- buddy = __get_buddy(block);
- if (!drm_buddy_block_is_free(buddy))
- continue;
+ buddy = __get_buddy(block);
+ if (!drm_buddy_block_is_free(buddy))
+ continue;
- WARN_ON(drm_buddy_block_is_clear(block) ==
- drm_buddy_block_is_clear(buddy));
+ WARN_ON(drm_buddy_block_is_clear(block) ==
+ drm_buddy_block_is_clear(buddy));
- /*
- * If the prev block is same as buddy, don't access the
- * block in the next iteration as we would free the
- * buddy block as part of the free function.
- */
- if (prev_block && prev_block == buddy) {
- if (prev_block != first_block)
- prev_block = rb_entry(rb_prev(&prev_block->rb),
- struct drm_buddy_block,
- rb);
- }
+ /*
+ * If the prev block is same as buddy, don't access the
+ * block in the next iteration as we would free the
+ * buddy block as part of the free function.
+ */
+ if (prev_block && prev_block == buddy) {
+ if (prev_block != rbtree_first_free_block(root))
+ prev_block = rbtree_prev_free_block(&prev_block->rb);
+ }
- rbtree_remove(mm, block);
- if (drm_buddy_block_is_clear(block))
- mm->clear_avail -= drm_buddy_block_size(mm, block);
+ rbtree_remove(mm, block);
+ if (drm_buddy_block_is_clear(block))
+ mm->clear_avail -= drm_buddy_block_size(mm, block);
- order = __drm_buddy_free(mm, block, true);
- if (order >= min_order)
- return 0;
+ order = __drm_buddy_free(mm, block, true);
+ if (order >= min_order)
+ return 0;
+ }
}
}
@@ -311,7 +365,7 @@ static int __force_merge(struct drm_buddy *mm,
*/
int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
{
- unsigned int i;
+ unsigned int i, j;
u64 offset;
if (size < chunk_size)
@@ -333,14 +387,16 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
- mm->free_tree = kmalloc_array(mm->max_order + 1,
- sizeof(struct rb_root),
- GFP_KERNEL);
- if (!mm->free_tree)
- return -ENOMEM;
+ for (i = 0; i < MAX_FREE_TREES; i++) {
+ mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
+ sizeof(struct rb_root),
+ GFP_KERNEL);
+ if (!mm->free_trees[i])
+ goto out_free_tree;
- for (i = 0; i <= mm->max_order; ++i)
- mm->free_tree[i] = RB_ROOT;
+ for (j = 0; j <= mm->max_order; ++j)
+ mm->free_trees[i][j] = RB_ROOT;
+ }
mm->n_roots = hweight64(size);
@@ -388,7 +444,8 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
drm_block_free(mm, mm->roots[i]);
kfree(mm->roots);
out_free_tree:
- kfree(mm->free_tree);
+ while (i--)
+ kfree(mm->free_trees[i]);
return -ENOMEM;
}
EXPORT_SYMBOL(drm_buddy_init);
@@ -424,8 +481,9 @@ void drm_buddy_fini(struct drm_buddy *mm)
WARN_ON(mm->avail != mm->size);
+ for (i = 0; i < MAX_FREE_TREES; i++)
+ kfree(mm->free_trees[i]);
kfree(mm->roots);
- kfree(mm->free_tree);
}
EXPORT_SYMBOL(drm_buddy_fini);
@@ -449,15 +507,15 @@ static int split_block(struct drm_buddy *mm,
return -ENOMEM;
}
- mark_free(mm, block->left);
- mark_free(mm, block->right);
-
if (drm_buddy_block_is_clear(block)) {
mark_cleared(block->left);
mark_cleared(block->right);
clear_reset(block);
}
+ mark_free(mm, block->left);
+ mark_free(mm, block->right);
+
mark_split(mm, block);
return 0;
@@ -491,6 +549,7 @@ EXPORT_SYMBOL(drm_get_buddy);
*/
void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
{
+ enum free_tree src_tree, dst_tree;
u64 root_size, size, start;
unsigned int order;
int i;
@@ -505,19 +564,24 @@ void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
size -= root_size;
}
+ src_tree = is_clear ? DIRTY_TREE : CLEAR_TREE;
+ dst_tree = is_clear ? CLEAR_TREE : DIRTY_TREE;
+
for (i = 0; i <= mm->max_order; ++i) {
+ struct rb_root *root = get_root(mm, order, src_tree);
struct drm_buddy_block *block;
- for_each_rb_free_block_reverse(block, &mm->free_tree[i], rb) {
- if (is_clear != drm_buddy_block_is_clear(block)) {
- if (is_clear) {
- mark_cleared(block);
- mm->clear_avail += drm_buddy_block_size(mm, block);
- } else {
- clear_reset(block);
- mm->clear_avail -= drm_buddy_block_size(mm, block);
- }
+ for_each_rb_free_block_reverse(block, root, rb) {
+ rbtree_remove(mm, block);
+ if (is_clear) {
+ mark_cleared(block);
+ mm->clear_avail += drm_buddy_block_size(mm, block);
+ } else {
+ clear_reset(block);
+ mm->clear_avail -= drm_buddy_block_size(mm, block);
}
+
+ rbtree_insert(mm, block, dst_tree);
}
}
}
@@ -707,26 +771,22 @@ __drm_buddy_alloc_range_bias(struct drm_buddy *mm,
}
static struct drm_buddy_block *
-get_maxblock(struct drm_buddy *mm, unsigned int order,
- unsigned long flags)
+get_maxblock(struct drm_buddy *mm,
+ unsigned int order,
+ enum free_tree tree)
{
struct drm_buddy_block *max_block = NULL, *block = NULL;
+ struct rb_root *root;
unsigned int i;
for (i = order; i <= mm->max_order; ++i) {
- struct drm_buddy_block *tmp_block;
-
- for_each_rb_free_block_reverse(tmp_block, &mm->free_tree[i], rb) {
- if (block_incompatible(tmp_block, flags))
+ root = get_root(mm, i, tree);
+ if (!rbtree_is_empty(root)) {
+ block = rbtree_last_free_block(root);
+ if (!block)
continue;
-
- block = tmp_block;
- break;
}
- if (!block)
- continue;
-
if (!max_block) {
max_block = block;
continue;
@@ -747,36 +807,38 @@ alloc_from_freetree(struct drm_buddy *mm,
unsigned long flags)
{
struct drm_buddy_block *block = NULL;
+ struct rb_root *root;
+ enum free_tree tree;
unsigned int tmp;
int err;
+ tree = get_tree_for_flags(flags);
+
if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
- block = get_maxblock(mm, order, flags);
+ block = get_maxblock(mm, order, tree);
if (block)
/* Store the obtained block order */
tmp = drm_buddy_block_order(block);
} else {
for (tmp = order; tmp <= mm->max_order; ++tmp) {
- struct drm_buddy_block *tmp_block;
-
- for_each_rb_free_block_reverse(tmp_block, &mm->free_tree[tmp], rb) {
- if (block_incompatible(tmp_block, flags))
- continue;
-
- block = tmp_block;
- break;
+ /* Get RB tree root for this order and tree */
+ root = get_root(mm, tmp, tree);
+ if (!rbtree_is_empty(root)) {
+ block = rbtree_last_free_block(root);
+ if (block)
+ break;
}
-
- if (block)
- break;
}
}
if (!block) {
- /* Fallback method */
+ /* Try allocating from the other tree */
+ tree = (tree == CLEAR_TREE) ? DIRTY_TREE : CLEAR_TREE;
+
for (tmp = order; tmp <= mm->max_order; ++tmp) {
- if (!rbtree_is_empty(mm, tmp)) {
- block = rbtree_last_entry(mm, tmp);
+ root = get_root(mm, tmp, tree);
+ if (!rbtree_is_empty(root)) {
+ block = rbtree_last_free_block(root);
if (block)
break;
}
@@ -923,6 +985,7 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
u64 rhs_offset, lhs_offset, lhs_size, filled;
struct drm_buddy_block *block;
LIST_HEAD(blocks_lhs);
+ enum free_tree tree;
unsigned long pages;
unsigned int order;
u64 modify_size;
@@ -934,34 +997,39 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
if (order == 0)
return -ENOSPC;
- if (rbtree_is_empty(mm, order))
+ if (rbtree_is_empty(get_root(mm, order, CLEAR_TREE)) &&
+ rbtree_is_empty(get_root(mm, order, DIRTY_TREE)))
return -ENOSPC;
- for_each_rb_free_block_reverse(block, &mm->free_tree[order], rb) {
- /* Allocate blocks traversing RHS */
- rhs_offset = drm_buddy_block_offset(block);
- err = __drm_buddy_alloc_range(mm, rhs_offset, size,
- &filled, blocks);
- if (!err || err != -ENOSPC)
- return err;
-
- lhs_size = max((size - filled), min_block_size);
- if (!IS_ALIGNED(lhs_size, min_block_size))
- lhs_size = round_up(lhs_size, min_block_size);
-
- /* Allocate blocks traversing LHS */
- lhs_offset = drm_buddy_block_offset(block) - lhs_size;
- err = __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
- NULL, &blocks_lhs);
- if (!err) {
- list_splice(&blocks_lhs, blocks);
- return 0;
- } else if (err != -ENOSPC) {
+ for_each_free_tree(tree) {
+ struct rb_root *root = get_root(mm, order, tree);
+
+ for_each_rb_free_block_reverse(block, root, rb) {
+ /* Allocate blocks traversing RHS */
+ rhs_offset = drm_buddy_block_offset(block);
+ err = __drm_buddy_alloc_range(mm, rhs_offset, size,
+ &filled, blocks);
+ if (!err || err != -ENOSPC)
+ return err;
+
+ lhs_size = max((size - filled), min_block_size);
+ if (!IS_ALIGNED(lhs_size, min_block_size))
+ lhs_size = round_up(lhs_size, min_block_size);
+
+ /* Allocate blocks traversing LHS */
+ lhs_offset = drm_buddy_block_offset(block) - lhs_size;
+ err = __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
+ NULL, &blocks_lhs);
+ if (!err) {
+ list_splice(&blocks_lhs, blocks);
+ return 0;
+ } else if (err != -ENOSPC) {
+ drm_buddy_free_list_internal(mm, blocks);
+ return err;
+ }
+ /* Free blocks for the next iteration */
drm_buddy_free_list_internal(mm, blocks);
- return err;
}
- /* Free blocks for the next iteration */
- drm_buddy_free_list_internal(mm, blocks);
}
return -ENOSPC;
@@ -1266,6 +1334,7 @@ EXPORT_SYMBOL(drm_buddy_block_print);
*/
void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
{
+ enum free_tree tree;
int order;
drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
@@ -1273,11 +1342,16 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
for (order = mm->max_order; order >= 0; order--) {
struct drm_buddy_block *block;
+ struct rb_root *root;
u64 count = 0, free;
- for_each_rb_free_block(block, &mm->free_tree[order], rb) {
- BUG_ON(!drm_buddy_block_is_free(block));
- count++;
+ for_each_free_tree(tree) {
+ root = get_root(mm, order, tree);
+
+ for_each_rb_free_block(block, root, rb) {
+ BUG_ON(!drm_buddy_block_is_free(block));
+ count++;
+ }
}
drm_printf(p, "order-%2d ", order);
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index 091823592034..2fc1cc7b78bf 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -14,6 +14,12 @@
#include <drm/drm_print.h>
+enum free_tree {
+ CLEAR_TREE = 0,
+ DIRTY_TREE,
+ MAX_FREE_TREES,
+};
+
#define range_overflows(start, size, max) ({ \
typeof(start) start__ = (start); \
typeof(size) size__ = (size); \
@@ -73,7 +79,7 @@ struct drm_buddy_block {
*/
struct drm_buddy {
/* Maintain a free list for each order. */
- struct rb_root *free_tree;
+ struct rb_root *free_trees[MAX_FREE_TREES];
/*
* Maintain explicit binary tree(s) to track the allocation of the
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* ✗ CI.checkpatch: warning for series starting with [v3,1/2] drm/buddy: Optimize free block management with RB tree
2025-08-21 15:06 [PATCH v3 1/2] drm/buddy: Optimize free block management with RB tree Arunpravin Paneer Selvam
2025-08-21 15:06 ` [PATCH v3 2/2] drm/buddy: Separate clear and dirty free block trees Arunpravin Paneer Selvam
@ 2025-08-21 17:09 ` Patchwork
2025-08-21 17:10 ` ✗ CI.KUnit: failure " Patchwork
2025-08-22 8:37 ` [PATCH v3 1/2] " Matthew Auld
3 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2025-08-21 17:09 UTC (permalink / raw)
To: Arunpravin Paneer Selvam; +Cc: intel-xe
== Series Details ==
Series: series starting with [v3,1/2] drm/buddy: Optimize free block management with RB tree
URL : https://patchwork.freedesktop.org/series/153302/
State : warning
== Summary ==
+ KERNEL=/kernel
+ git clone https://gitlab.freedesktop.org/drm/maintainer-tools mt
Cloning into 'mt'...
warning: redirecting to https://gitlab.freedesktop.org/drm/maintainer-tools.git/
+ git -C mt rev-list -n1 origin/master
553439844b6500767ce8aef522cfe9fbb7ece541
+ cd /kernel
+ git config --global --add safe.directory /kernel
+ git log -n1
commit f439559ab901041d7ec24bbe403502e8f2f0b61c
Author: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Date: Thu Aug 21 20:36:41 2025 +0530
drm/buddy: Separate clear and dirty free block trees
Maintain two separate RB trees per order - one for clear (zeroed) blocks
and another for dirty (uncleared) blocks. This separation improves
code clarity and makes it more obvious which tree is being searched
during allocation. It also improves scalability and efficiency when
searching for a specific type of block, avoiding unnecessary checks
and making the allocator more predictable under fragmentation.
The changes have been validated using the existing drm_buddy_test
KUnit test cases, along with selected graphics workloads,
to ensure correctness and avoid regressions.
v2: Missed adding the suggested-by tag. Added it in v2.
v3(Matthew):
- Remove the double underscores from the internal functions.
- Rename the internal functions to have less generic names.
- Fix the error handling code.
- Pass tree argument for the tree macro.
- Use the existing dirty/free bit instead of new tree field.
- Make free_trees[] instead of clear_tree and dirty_tree for
more cleaner approach.
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Suggested-by: Matthew Auld <matthew.auld@intel.com>
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4260
+ /mt/dim checkpatch 633a37934680cad76b742422d1b955d4cdde5189 drm-intel
9034e22e8362 drm/buddy: Optimize free block management with RB tree
-:44: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'pos' - possible side-effects?
#44: FILE: drivers/gpu/drm/drm_buddy.c:23:
+#define for_each_rb_free_block(pos, root, member) \
+ for (pos = rb_entry_safe(rb_first(root), typeof(*pos), member); \
+ pos; \
+ pos = rb_entry_safe(rb_next(&(pos)->member), typeof(*pos), member))
-:44: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'member' - possible side-effects?
#44: FILE: drivers/gpu/drm/drm_buddy.c:23:
+#define for_each_rb_free_block(pos, root, member) \
+ for (pos = rb_entry_safe(rb_first(root), typeof(*pos), member); \
+ pos; \
+ pos = rb_entry_safe(rb_next(&(pos)->member), typeof(*pos), member))
-:55: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'pos' - possible side-effects?
#55: FILE: drivers/gpu/drm/drm_buddy.c:34:
+#define for_each_rb_free_block_reverse(pos, root, member) \
+ for (pos = rb_entry_safe(rb_last(root), typeof(*pos), member); \
+ pos; \
+ pos = rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), member))
-:55: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'member' - possible side-effects?
#55: FILE: drivers/gpu/drm/drm_buddy.c:34:
+#define for_each_rb_free_block_reverse(pos, root, member) \
+ for (pos = rb_entry_safe(rb_last(root), typeof(*pos), member); \
+ pos; \
+ pos = rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), member))
-:67: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'pos' - possible side-effects?
#67: FILE: drivers/gpu/drm/drm_buddy.c:46:
+#define for_each_rb_free_block_reverse_safe(pos, n, root, member) \
+ for (pos = rb_entry_safe(rb_last(root), typeof(*pos), member), \
+ n = pos ? rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), member) : NULL; \
+ pos; \
+ pos = n, n = pos ? rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), member) : NULL)
-:67: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'n' - possible side-effects?
#67: FILE: drivers/gpu/drm/drm_buddy.c:46:
+#define for_each_rb_free_block_reverse_safe(pos, n, root, member) \
+ for (pos = rb_entry_safe(rb_last(root), typeof(*pos), member), \
+ n = pos ? rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), member) : NULL; \
+ pos; \
+ pos = n, n = pos ? rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), member) : NULL)
-:67: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'member' - possible side-effects?
#67: FILE: drivers/gpu/drm/drm_buddy.c:46:
+#define for_each_rb_free_block_reverse_safe(pos, n, root, member) \
+ for (pos = rb_entry_safe(rb_last(root), typeof(*pos), member), \
+ n = pos ? rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), member) : NULL; \
+ pos; \
+ pos = n, n = pos ? rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), member) : NULL)
total: 0 errors, 0 warnings, 7 checks, 388 lines checked
f439559ab901 drm/buddy: Separate clear and dirty free block trees
-:39: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'tree' - possible side-effects?
#39: FILE: drivers/gpu/drm/drm_buddy.c:17:
+#define for_each_free_tree(tree) \
+ for ((tree) = CLEAR_TREE; (tree) < MAX_FREE_TREES; (tree)++)
-:254: WARNING:LONG_LINE: line length of 101 exceeds 100 columns
#254: FILE: drivers/gpu/drm/drm_buddy.c:337:
+ prev_block = rbtree_prev_free_block(&prev_block->rb);
-:572: WARNING:AVOID_BUG: Do not crash the kernel unless it is absolutely unavoidable--use WARN_ON_ONCE() plus recovery code (if feasible) instead of BUG() or variants
#572: FILE: drivers/gpu/drm/drm_buddy.c:1352:
+ BUG_ON(!drm_buddy_block_is_free(block));
total: 0 errors, 2 warnings, 1 checks, 544 lines checked
^ permalink raw reply [flat|nested] 10+ messages in thread
* ✗ CI.KUnit: failure for series starting with [v3,1/2] drm/buddy: Optimize free block management with RB tree
2025-08-21 15:06 [PATCH v3 1/2] drm/buddy: Optimize free block management with RB tree Arunpravin Paneer Selvam
2025-08-21 15:06 ` [PATCH v3 2/2] drm/buddy: Separate clear and dirty free block trees Arunpravin Paneer Selvam
2025-08-21 17:09 ` ✗ CI.checkpatch: warning for series starting with [v3,1/2] drm/buddy: Optimize free block management with RB tree Patchwork
@ 2025-08-21 17:10 ` Patchwork
2025-08-22 8:37 ` [PATCH v3 1/2] " Matthew Auld
3 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2025-08-21 17:10 UTC (permalink / raw)
To: Arunpravin Paneer Selvam; +Cc: intel-xe
== Series Details ==
Series: series starting with [v3,1/2] drm/buddy: Optimize free block management with RB tree
URL : https://patchwork.freedesktop.org/series/153302/
State : failure
== Summary ==
+ trap cleanup EXIT
+ /kernel/tools/testing/kunit/kunit.py run --kunitconfig /kernel/drivers/gpu/drm/xe/.kunitconfig
[17:09:27] Configuring KUnit Kernel ...
Generating .config ...
Populating config with:
$ make ARCH=um O=.kunit olddefconfig
[17:09:31] Building KUnit Kernel ...
Populating config with:
$ make ARCH=um O=.kunit olddefconfig
Building with:
$ make all compile_commands.json scripts_gdb ARCH=um O=.kunit --jobs=48
[17:10:00] Starting KUnit Kernel (1/1)...
[17:10:00] ============================================================
Running tests with:
$ .kunit/linux kunit.enable=1 mem=1G console=tty kunit_shutdown=halt
[17:10:00] ================== guc_buf (11 subtests) ===================
[17:10:00] [PASSED] test_smallest
[17:10:00] [PASSED] test_largest
[17:10:00] [PASSED] test_granular
[17:10:00] [PASSED] test_unique
[17:10:00] [PASSED] test_overlap
[17:10:00] [PASSED] test_reusable
[17:10:00] [PASSED] test_too_big
[17:10:00] [PASSED] test_flush
[17:10:00] [PASSED] test_lookup
[17:10:00] [PASSED] test_data
[17:10:00] [PASSED] test_class
[17:10:00] ===================== [PASSED] guc_buf =====================
[17:10:00] =================== guc_dbm (7 subtests) ===================
[17:10:00] [PASSED] test_empty
[17:10:00] [PASSED] test_default
[17:10:00] ======================== test_size ========================
[17:10:00] [PASSED] 4
[17:10:00] [PASSED] 8
[17:10:00] [PASSED] 32
[17:10:00] [PASSED] 256
[17:10:00] ==================== [PASSED] test_size ====================
[17:10:00] ======================= test_reuse ========================
[17:10:00] [PASSED] 4
[17:10:00] [PASSED] 8
[17:10:00] [PASSED] 32
[17:10:00] [PASSED] 256
[17:10:00] =================== [PASSED] test_reuse ====================
[17:10:00] =================== test_range_overlap ====================
[17:10:00] [PASSED] 4
[17:10:00] [PASSED] 8
[17:10:00] [PASSED] 32
[17:10:00] [PASSED] 256
[17:10:00] =============== [PASSED] test_range_overlap ================
[17:10:00] =================== test_range_compact ====================
[17:10:00] [PASSED] 4
[17:10:00] [PASSED] 8
[17:10:00] [PASSED] 32
[17:10:00] [PASSED] 256
[17:10:00] =============== [PASSED] test_range_compact ================
[17:10:00] ==================== test_range_spare =====================
[17:10:00] [PASSED] 4
[17:10:00] [PASSED] 8
[17:10:00] [PASSED] 32
[17:10:00] [PASSED] 256
[17:10:00] ================ [PASSED] test_range_spare =================
[17:10:00] ===================== [PASSED] guc_dbm =====================
[17:10:00] =================== guc_idm (6 subtests) ===================
[17:10:00] [PASSED] bad_init
[17:10:00] [PASSED] no_init
[17:10:00] [PASSED] init_fini
[17:10:00] [PASSED] check_used
[17:10:00] [PASSED] check_quota
[17:10:00] [PASSED] check_all
[17:10:00] ===================== [PASSED] guc_idm =====================
[17:10:00] ================== no_relay (3 subtests) ===================
[17:10:00] [PASSED] xe_drops_guc2pf_if_not_ready
[17:10:00] [PASSED] xe_drops_guc2vf_if_not_ready
[17:10:00] [PASSED] xe_rejects_send_if_not_ready
[17:10:00] ==================== [PASSED] no_relay =====================
[17:10:00] ================== pf_relay (14 subtests) ==================
[17:10:00] [PASSED] pf_rejects_guc2pf_too_short
[17:10:00] [PASSED] pf_rejects_guc2pf_too_long
[17:10:00] [PASSED] pf_rejects_guc2pf_no_payload
[17:10:00] [PASSED] pf_fails_no_payload
[17:10:00] [PASSED] pf_fails_bad_origin
[17:10:00] [PASSED] pf_fails_bad_type
[17:10:00] [PASSED] pf_txn_reports_error
[17:10:00] [PASSED] pf_txn_sends_pf2guc
[17:10:00] [PASSED] pf_sends_pf2guc
[17:10:00] [SKIPPED] pf_loopback_nop
[17:10:00] [SKIPPED] pf_loopback_echo
[17:10:00] [SKIPPED] pf_loopback_fail
[17:10:00] [SKIPPED] pf_loopback_busy
[17:10:00] [SKIPPED] pf_loopback_retry
[17:10:00] ==================== [PASSED] pf_relay =====================
[17:10:00] ================== vf_relay (3 subtests) ===================
[17:10:00] [PASSED] vf_rejects_guc2vf_too_short
[17:10:00] [PASSED] vf_rejects_guc2vf_too_long
[17:10:00] [PASSED] vf_rejects_guc2vf_no_payload
[17:10:00] ==================== [PASSED] vf_relay =====================
[17:10:00] ===================== lmtt (1 subtest) =====================
[17:10:00] ======================== test_ops =========================
[17:10:00] [PASSED] 2-level
[17:10:00] [PASSED] multi-level
[17:10:00] ==================== [PASSED] test_ops =====================
[17:10:00] ====================== [PASSED] lmtt =======================
[17:10:00] ================= pf_service (11 subtests) =================
[17:10:00] [PASSED] pf_negotiate_any
[17:10:00] [PASSED] pf_negotiate_base_match
[17:10:00] [PASSED] pf_negotiate_base_newer
[17:10:00] [PASSED] pf_negotiate_base_next
[17:10:00] [SKIPPED] pf_negotiate_base_older
[17:10:00] [PASSED] pf_negotiate_base_prev
[17:10:00] [PASSED] pf_negotiate_latest_match
[17:10:00] [PASSED] pf_negotiate_latest_newer
[17:10:00] [PASSED] pf_negotiate_latest_next
[17:10:00] [SKIPPED] pf_negotiate_latest_older
[17:10:00] [SKIPPED] pf_negotiate_latest_prev
[17:10:00] =================== [PASSED] pf_service ====================
[17:10:00] =================== xe_mocs (2 subtests) ===================
[17:10:00] ================ xe_live_mocs_kernel_kunit ================
[17:10:00] =========== [SKIPPED] xe_live_mocs_kernel_kunit ============
[17:10:00] ================ xe_live_mocs_reset_kunit =================
[17:10:00] ============ [SKIPPED] xe_live_mocs_reset_kunit ============
[17:10:00] ==================== [SKIPPED] xe_mocs =====================
[17:10:00] ================= xe_migrate (2 subtests) ==================
[17:10:00] ================= xe_migrate_sanity_kunit =================
[17:10:00] ============ [SKIPPED] xe_migrate_sanity_kunit =============
[17:10:00] ================== xe_validate_ccs_kunit ==================
[17:10:00] ============= [SKIPPED] xe_validate_ccs_kunit ==============
[17:10:00] =================== [SKIPPED] xe_migrate ===================
[17:10:00] ================== xe_dma_buf (1 subtest) ==================
[17:10:00] ==================== xe_dma_buf_kunit =====================
[17:10:00] ================ [SKIPPED] xe_dma_buf_kunit ================
[17:10:00] =================== [SKIPPED] xe_dma_buf ===================
[17:10:00] ================= xe_bo_shrink (1 subtest) =================
[17:10:00] =================== xe_bo_shrink_kunit ====================
[17:10:00] =============== [SKIPPED] xe_bo_shrink_kunit ===============
[17:10:00] ================== [SKIPPED] xe_bo_shrink ==================
[17:10:00] ==================== xe_bo (2 subtests) ====================
[17:10:00] ================== xe_ccs_migrate_kunit ===================
[17:10:00] ============== [SKIPPED] xe_ccs_migrate_kunit ==============
[17:10:00] ==================== xe_bo_evict_kunit ====================
[17:10:00] =============== [SKIPPED] xe_bo_evict_kunit ================
[17:10:00] ===================== [SKIPPED] xe_bo ======================
[17:10:00] ==================== args (11 subtests) ====================
[17:10:00] [PASSED] count_args_test
[17:10:00] [PASSED] call_args_example
[17:10:00] [PASSED] call_args_test
[17:10:00] [PASSED] drop_first_arg_example
[17:10:00] [PASSED] drop_first_arg_test
[17:10:00] [PASSED] first_arg_example
[17:10:00] [PASSED] first_arg_test
[17:10:00] [PASSED] last_arg_example
[17:10:00] [PASSED] last_arg_test
[17:10:00] [PASSED] pick_arg_example
[17:10:00] [PASSED] sep_comma_example
[17:10:00] ====================== [PASSED] args =======================
[17:10:00] =================== xe_pci (3 subtests) ====================
[17:10:00] ==================== check_graphics_ip ====================
[17:10:00] [PASSED] 12.70 Xe_LPG
[17:10:00] [PASSED] 12.71 Xe_LPG
[17:10:00] [PASSED] 12.74 Xe_LPG+
[17:10:00] [PASSED] 20.01 Xe2_HPG
[17:10:00] [PASSED] 20.02 Xe2_HPG
[17:10:00] [PASSED] 20.04 Xe2_LPG
[17:10:00] [PASSED] 30.00 Xe3_LPG
[17:10:00] [PASSED] 30.01 Xe3_LPG
[17:10:00] [PASSED] 30.03 Xe3_LPG
[17:10:00] ================ [PASSED] check_graphics_ip ================
[17:10:00] ===================== check_media_ip ======================
[17:10:00] [PASSED] 13.00 Xe_LPM+
[17:10:00] [PASSED] 13.01 Xe2_HPM
[17:10:00] [PASSED] 20.00 Xe2_LPM
[17:10:00] [PASSED] 30.00 Xe3_LPM
[17:10:00] [PASSED] 30.02 Xe3_LPM
[17:10:00] ================= [PASSED] check_media_ip ==================
[17:10:00] ================= check_platform_gt_count =================
[17:10:00] [PASSED] 0x9A60 (TIGERLAKE)
[17:10:00] [PASSED] 0x9A68 (TIGERLAKE)
[17:10:00] [PASSED] 0x9A70 (TIGERLAKE)
[17:10:00] [PASSED] 0x9A40 (TIGERLAKE)
[17:10:00] [PASSED] 0x9A49 (TIGERLAKE)
[17:10:00] [PASSED] 0x9A59 (TIGERLAKE)
[17:10:00] [PASSED] 0x9A78 (TIGERLAKE)
[17:10:00] [PASSED] 0x9AC0 (TIGERLAKE)
[17:10:00] [PASSED] 0x9AC9 (TIGERLAKE)
[17:10:00] [PASSED] 0x9AD9 (TIGERLAKE)
[17:10:00] [PASSED] 0x9AF8 (TIGERLAKE)
[17:10:00] [PASSED] 0x4C80 (ROCKETLAKE)
[17:10:00] [PASSED] 0x4C8A (ROCKETLAKE)
[17:10:00] [PASSED] 0x4C8B (ROCKETLAKE)
[17:10:00] [PASSED] 0x4C8C (ROCKETLAKE)
[17:10:00] [PASSED] 0x4C90 (ROCKETLAKE)
[17:10:00] [PASSED] 0x4C9A (ROCKETLAKE)
[17:10:00] [PASSED] 0x4680 (ALDERLAKE_S)
[17:10:00] [PASSED] 0x4682 (ALDERLAKE_S)
[17:10:00] [PASSED] 0x4688 (ALDERLAKE_S)
[17:10:00] [PASSED] 0x468A (ALDERLAKE_S)
[17:10:00] [PASSED] 0x468B (ALDERLAKE_S)
[17:10:00] [PASSED] 0x4690 (ALDERLAKE_S)
[17:10:00] [PASSED] 0x4692 (ALDERLAKE_S)
[17:10:00] [PASSED] 0x4693 (ALDERLAKE_S)
[17:10:00] [PASSED] 0x46A0 (ALDERLAKE_P)
[17:10:00] [PASSED] 0x46A1 (ALDERLAKE_P)
[17:10:00] [PASSED] 0x46A2 (ALDERLAKE_P)
[17:10:00] [PASSED] 0x46A3 (ALDERLAKE_P)
[17:10:00] [PASSED] 0x46A6 (ALDERLAKE_P)
[17:10:00] [PASSED] 0x46A8 (ALDERLAKE_P)
[17:10:00] [PASSED] 0x46AA (ALDERLAKE_P)
[17:10:00] [PASSED] 0x462A (ALDERLAKE_P)
[17:10:00] [PASSED] 0x4626 (ALDERLAKE_P)
[17:10:00] [PASSED] 0x4628 (ALDERLAKE_P)
[17:10:00] [PASSED] 0x46B0 (ALDERLAKE_P)
[17:10:00] [PASSED] 0x46B1 (ALDERLAKE_P)
[17:10:00] [PASSED] 0x46B2 (ALDERLAKE_P)
[17:10:00] [PASSED] 0x46B3 (ALDERLAKE_P)
[17:10:00] [PASSED] 0x46C0 (ALDERLAKE_P)
[17:10:00] [PASSED] 0x46C1 (ALDERLAKE_P)
[17:10:00] [PASSED] 0x46C2 (ALDERLAKE_P)
[17:10:00] [PASSED] 0x46C3 (ALDERLAKE_P)
[17:10:00] [PASSED] 0x46D0 (ALDERLAKE_N)
[17:10:00] [PASSED] 0x46D1 (ALDERLAKE_N)
[17:10:00] [PASSED] 0x46D2 (ALDERLAKE_N)
[17:10:00] [PASSED] 0x46D3 (ALDERLAKE_N)
[17:10:00] [PASSED] 0x46D4 (ALDERLAKE_N)
[17:10:00] [PASSED] 0xA721 (ALDERLAKE_P)
[17:10:00] [PASSED] 0xA7A1 (ALDERLAKE_P)
[17:10:00] [PASSED] 0xA7A9 (ALDERLAKE_P)
[17:10:00] [PASSED] 0xA7AC (ALDERLAKE_P)
[17:10:00] [PASSED] 0xA7AD (ALDERLAKE_P)
[17:10:00] [PASSED] 0xA720 (ALDERLAKE_P)
[17:10:00] [PASSED] 0xA7A0 (ALDERLAKE_P)
[17:10:00] [PASSED] 0xA7A8 (ALDERLAKE_P)
[17:10:00] [PASSED] 0xA7AA (ALDERLAKE_P)
[17:10:00] [PASSED] 0xA7AB (ALDERLAKE_P)
[17:10:00] [PASSED] 0xA780 (ALDERLAKE_S)
[17:10:00] [PASSED] 0xA781 (ALDERLAKE_S)
[17:10:00] [PASSED] 0xA782 (ALDERLAKE_S)
[17:10:00] [PASSED] 0xA783 (ALDERLAKE_S)
[17:10:00] [PASSED] 0xA788 (ALDERLAKE_S)
[17:10:00] [PASSED] 0xA789 (ALDERLAKE_S)
[17:10:00] [PASSED] 0xA78A (ALDERLAKE_S)
[17:10:00] [PASSED] 0xA78B (ALDERLAKE_S)
[17:10:00] [PASSED] 0x4905 (DG1)
[17:10:00] [PASSED] 0x4906 (DG1)
[17:10:00] [PASSED] 0x4907 (DG1)
[17:10:00] [PASSED] 0x4908 (DG1)
[17:10:00] [PASSED] 0x4909 (DG1)
[17:10:00] [PASSED] 0x56C0 (DG2)
[17:10:00] [PASSED] 0x56C2 (DG2)
[17:10:00] [PASSED] 0x56C1 (DG2)
[17:10:00] [PASSED] 0x7D51 (METEORLAKE)
[17:10:00] [PASSED] 0x7DD1 (METEORLAKE)
[17:10:00] [PASSED] 0x7D41 (METEORLAKE)
[17:10:00] [PASSED] 0x7D67 (METEORLAKE)
[17:10:00] [PASSED] 0xB640 (METEORLAKE)
[17:10:00] [PASSED] 0x56A0 (DG2)
[17:10:00] [PASSED] 0x56A1 (DG2)
[17:10:00] [PASSED] 0x56A2 (DG2)
[17:10:00] [PASSED] 0x56BE (DG2)
[17:10:00] [PASSED] 0x56BF (DG2)
[17:10:00] [PASSED] 0x5690 (DG2)
[17:10:00] [PASSED] 0x5691 (DG2)
[17:10:00] [PASSED] 0x5692 (DG2)
[17:10:00] [PASSED] 0x56A5 (DG2)
[17:10:00] [PASSED] 0x56A6 (DG2)
[17:10:00] [PASSED] 0x56B0 (DG2)
[17:10:00] [PASSED] 0x56B1 (DG2)
[17:10:00] [PASSED] 0x56BA (DG2)
[17:10:00] [PASSED] 0x56BB (DG2)
[17:10:00] [PASSED] 0x56BC (DG2)
[17:10:00] [PASSED] 0x56BD (DG2)
[17:10:00] [PASSED] 0x5693 (DG2)
[17:10:00] [PASSED] 0x5694 (DG2)
[17:10:00] [PASSED] 0x5695 (DG2)
[17:10:00] [PASSED] 0x56A3 (DG2)
[17:10:00] [PASSED] 0x56A4 (DG2)
[17:10:00] [PASSED] 0x56B2 (DG2)
[17:10:00] [PASSED] 0x56B3 (DG2)
[17:10:00] [PASSED] 0x5696 (DG2)
[17:10:00] [PASSED] 0x5697 (DG2)
[17:10:00] [PASSED] 0xB69 (PVC)
[17:10:00] [PASSED] 0xB6E (PVC)
[17:10:00] [PASSED] 0xBD4 (PVC)
[17:10:00] [PASSED] 0xBD5 (PVC)
[17:10:00] [PASSED] 0xBD6 (PVC)
[17:10:00] [PASSED] 0xBD7 (PVC)
[17:10:00] [PASSED] 0xBD8 (PVC)
[17:10:00] [PASSED] 0xBD9 (PVC)
[17:10:00] [PASSED] 0xBDA (PVC)
[17:10:00] [PASSED] 0xBDB (PVC)
[17:10:00] [PASSED] 0xBE0 (PVC)
[17:10:00] [PASSED] 0xBE1 (PVC)
[17:10:00] [PASSED] 0xBE5 (PVC)
[17:10:00] [PASSED] 0x7D40 (METEORLAKE)
[17:10:00] [PASSED] 0x7D45 (METEORLAKE)
[17:10:00] [PASSED] 0x7D55 (METEORLAKE)
[17:10:00] [PASSED] 0x7D60 (METEORLAKE)
[17:10:00] [PASSED] 0x7DD5 (METEORLAKE)
[17:10:00] [PASSED] 0x6420 (LUNARLAKE)
[17:10:00] [PASSED] 0x64A0 (LUNARLAKE)
[17:10:00] [PASSED] 0x64B0 (LUNARLAKE)
[17:10:00] [PASSED] 0xE202 (BATTLEMAGE)
[17:10:00] [PASSED] 0xE209 (BATTLEMAGE)
[17:10:00] [PASSED] 0xE20B (BATTLEMAGE)
[17:10:00] [PASSED] 0xE20C (BATTLEMAGE)
[17:10:00] [PASSED] 0xE20D (BATTLEMAGE)
[17:10:00] [PASSED] 0xE210 (BATTLEMAGE)
[17:10:00] [PASSED] 0xE211 (BATTLEMAGE)
[17:10:00] [PASSED] 0xE212 (BATTLEMAGE)
[17:10:00] [PASSED] 0xE216 (BATTLEMAGE)
[17:10:00] [PASSED] 0xE220 (BATTLEMAGE)
[17:10:00] [PASSED] 0xE221 (BATTLEMAGE)
[17:10:00] [PASSED] 0xE222 (BATTLEMAGE)
[17:10:00] [PASSED] 0xE223 (BATTLEMAGE)
[17:10:00] [PASSED] 0xB080 (PANTHERLAKE)
[17:10:00] [PASSED] 0xB081 (PANTHERLAKE)
[17:10:00] [PASSED] 0xB082 (PANTHERLAKE)
[17:10:00] [PASSED] 0xB083 (PANTHERLAKE)
[17:10:00] [PASSED] 0xB084 (PANTHERLAKE)
[17:10:00] [PASSED] 0xB085 (PANTHERLAKE)
[17:10:00] [PASSED] 0xB086 (PANTHERLAKE)
[17:10:00] [PASSED] 0xB087 (PANTHERLAKE)
[17:10:00] [PASSED] 0xB08F (PANTHERLAKE)
[17:10:00] [PASSED] 0xB090 (PANTHERLAKE)
[17:10:00] [PASSED] 0xB0A0 (PANTHERLAKE)
[17:10:00] [PASSED] 0xB0B0 (PANTHERLAKE)
[17:10:00] [PASSED] 0xFD80 (PANTHERLAKE)
[17:10:00] [PASSED] 0xFD81 (PANTHERLAKE)
[17:10:00] ============= [PASSED] check_platform_gt_count =============
[17:10:00] ===================== [PASSED] xe_pci ======================
[17:10:00] =================== xe_rtp (2 subtests) ====================
[17:10:00] =============== xe_rtp_process_to_sr_tests ================
[17:10:00] [PASSED] coalesce-same-reg
[17:10:00] [PASSED] no-match-no-add
[17:10:00] [PASSED] match-or
[17:10:00] [PASSED] match-or-xfail
[17:10:00] [PASSED] no-match-no-add-multiple-rules
[17:10:00] [PASSED] two-regs-two-entries
[17:10:00] [PASSED] clr-one-set-other
[17:10:00] [PASSED] set-field
[17:10:00] [PASSED] conflict-duplicate
[17:10:00] [PASSED] conflict-not-disjoint
[17:10:00] [PASSED] conflict-reg-type
[17:10:00] =========== [PASSED] xe_rtp_process_to_sr_tests ============
[17:10:00] ================== xe_rtp_process_tests ===================
[17:10:00] [PASSED] active1
[17:10:00] [PASSED] active2
[17:10:00] [PASSED] active-inactive
[17:10:00] [PASSED] inactive-active
[17:10:00] [PASSED] inactive-1st_or_active-inactive
[17:10:00] [PASSED] inactive-2nd_or_active-inactive
[17:10:00] [PASSED] inactive-last_or_active-inactive
[17:10:00] [PASSED] inactive-no_or_active-inactive
[17:10:00] ============== [PASSED] xe_rtp_process_tests ===============
[17:10:00] ===================== [PASSED] xe_rtp ======================
[17:10:00] ==================== xe_wa (1 subtest) =====================
[17:10:00] ======================== xe_wa_gt =========================
[17:10:00] [PASSED] TIGERLAKE (B0)
[17:10:00] [PASSED] DG1 (A0)
[17:10:00] [PASSED] DG1 (B0)
[17:10:00] [PASSED] ALDERLAKE_S (A0)
[17:10:00] [PASSED] ALDERLAKE_S (B0)
[17:10:00] [PASSED] ALDERLAKE_S (C0)
[17:10:00] [PASSED] ALDERLAKE_S (D0)
[17:10:00] [PASSED] ALDERLAKE_P (A0)
[17:10:00] [PASSED] ALDERLAKE_P (B0)
[17:10:00] [PASSED] ALDERLAKE_P (C0)
[17:10:00] [PASSED] ALDERLAKE_S_RPLS (D0)
[17:10:00] [PASSED] ALDERLAKE_P_RPLU (E0)
[17:10:00] [PASSED] DG2_G10 (C0)
[17:10:00] [PASSED] DG2_G11 (B1)
[17:10:00] [PASSED] DG2_G12 (A1)
[17:10:00] [PASSED] METEORLAKE (g:A0, m:A0)
[17:10:00] [PASSED] METEORLAKE (g:A0, m:A0)
[17:10:00] [PASSED] METEORLAKE (g:A0, m:A0)
[17:10:00] [PASSED] LUNARLAKE (g:A0, m:A0)
[17:10:00] [PASSED] LUNARLAKE (g:B0, m:A0)
stty: 'standard input': Inappropriate ioctl for device
[17:10:00] [PASSED] BATTLEMAGE (g:A0, m:A1)
[17:10:00] [PASSED] PANTHERLAKE (g:A0, m:A0)
[17:10:00] ==================== [PASSED] xe_wa_gt =====================
[17:10:00] ====================== [PASSED] xe_wa ======================
[17:10:00] ============================================================
[17:10:00] Testing complete. Ran 298 tests: passed: 282, skipped: 16
[17:10:00] Elapsed time: 33.299s total, 4.277s configuring, 28.656s building, 0.330s running
+ /kernel/tools/testing/kunit/kunit.py run --kunitconfig /kernel/drivers/gpu/drm/tests/.kunitconfig
stty: 'standard input': Inappropriate ioctl for device
[17:10:00] Configuring KUnit Kernel ...
Regenerating .config ...
Populating config with:
$ make ARCH=um O=.kunit olddefconfig
[17:10:02] Building KUnit Kernel ...
Populating config with:
$ make ARCH=um O=.kunit olddefconfig
Building with:
$ make all compile_commands.json scripts_gdb ARCH=um O=.kunit --jobs=48
[17:10:25] Starting KUnit Kernel (1/1)...
[17:10:25] ============================================================
Running tests with:
$ .kunit/linux kunit.enable=1 mem=1G console=tty kunit_shutdown=halt
[17:10:25] == drm_test_atomic_get_connector_for_encoder (1 subtest) ===
[17:10:25] [PASSED] drm_test_drm_atomic_get_connector_for_encoder
[17:10:25] ==== [PASSED] drm_test_atomic_get_connector_for_encoder ====
[17:10:25] =========== drm_validate_clone_mode (2 subtests) ===========
[17:10:25] ============== drm_test_check_in_clone_mode ===============
[17:10:25] [PASSED] in_clone_mode
[17:10:25] [PASSED] not_in_clone_mode
[17:10:25] ========== [PASSED] drm_test_check_in_clone_mode ===========
[17:10:25] =============== drm_test_check_valid_clones ===============
[17:10:25] [PASSED] not_in_clone_mode
[17:10:25] [PASSED] valid_clone
[17:10:25] [PASSED] invalid_clone
[17:10:25] =========== [PASSED] drm_test_check_valid_clones ===========
[17:10:25] ============= [PASSED] drm_validate_clone_mode =============
[17:10:25] ============= drm_validate_modeset (1 subtest) =============
[17:10:25] [PASSED] drm_test_check_connector_changed_modeset
[17:10:25] ============== [PASSED] drm_validate_modeset ===============
[17:10:25] ====== drm_test_bridge_get_current_state (2 subtests) ======
[17:10:25] [PASSED] drm_test_drm_bridge_get_current_state_atomic
[17:10:25] [PASSED] drm_test_drm_bridge_get_current_state_legacy
[17:10:25] ======== [PASSED] drm_test_bridge_get_current_state ========
[17:10:25] ====== drm_test_bridge_helper_reset_crtc (3 subtests) ======
[17:10:25] [PASSED] drm_test_drm_bridge_helper_reset_crtc_atomic
[17:10:25] [PASSED] drm_test_drm_bridge_helper_reset_crtc_atomic_disabled
[17:10:25] [PASSED] drm_test_drm_bridge_helper_reset_crtc_legacy
[17:10:25] ======== [PASSED] drm_test_bridge_helper_reset_crtc ========
[17:10:25] ============== drm_bridge_alloc (2 subtests) ===============
[17:10:25] [PASSED] drm_test_drm_bridge_alloc_basic
[17:10:25] [PASSED] drm_test_drm_bridge_alloc_get_put
[17:10:25] ================ [PASSED] drm_bridge_alloc =================
[17:10:25] ================== drm_buddy (7 subtests) ==================
[17:10:25] [PASSED] drm_test_buddy_alloc_limit
[17:10:25] [PASSED] drm_test_buddy_alloc_optimistic
[17:10:25] [PASSED] drm_test_buddy_alloc_pessimistic
[17:10:25] [PASSED] drm_test_buddy_alloc_pathological
[17:10:25] [PASSED] drm_test_buddy_alloc_contiguous
[17:10:25] [ERROR] Test: drm_buddy: missing expected subtest!
[17:10:25]
[17:10:25] Pid: 55, comm: kunit_try_catch Tainted: G W N 6.17.0-rc2-gf439559ab901
[17:10:25] RIP: 0033:rb_erase+0x1a8/0x3b0
[17:10:25] RSP: 00000000bf883d98 EFLAGS: 00010246
[17:10:25] RAX: 000000007ff39c59 RBX: 0000000000000000 RCX: 000000007ff39c58
[17:10:25] RDX: 0000000000000000 RSI: 000000007ff39c58 RDI: 000000007ff39c08
[17:10:25] RBP: 00000000bf883ea0 R08: 000000007feeaf88 R09: 000000007ff39501
[17:10:25] R10: 000000007ff39901 R11: 0000000000000fff R12: 000000007ff39a50
[17:10:25] R13: 0000000060674278 R14: 000000007ff39c30 R15: 000000007ff39be0
[17:10:25] Kernel panic - not syncing: Segfault with no mm
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: drm_buddy: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: drm_buddy: missing subtest result line!
[17:10:25] # drm_buddy: Testing DRM buddy manager, with random_seed=0x205330ae
[17:10:25] # module: drm_buddy_test
[17:10:25] =================== [CRASHED] drm_buddy ====================
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] [ERROR] Test: main: missing expected subtest!
[17:10:25] [CRASHED]
[17:10:25] ============================================================
[17:10:25] Testing complete. Ran 49 tests: passed: 19, crashed: 30, errors: 31
The kernel seems to have crashed; you can decode the stack traces with:
$ scripts/decode_stacktrace.sh .kunit/vmlinux .kunit < .kunit/test.log | tee .kunit/decoded.log | /kernel/tools/testing/kunit/kunit.py parse
[17:10:25] Elapsed time: 24.899s total, 1.665s configuring, 22.966s building, 0.268s running
+ cleanup
++ stat -c %u:%g /kernel
+ chown -R 1003:1003 /kernel
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v3 1/2] drm/buddy: Optimize free block management with RB tree
2025-08-21 15:06 [PATCH v3 1/2] drm/buddy: Optimize free block management with RB tree Arunpravin Paneer Selvam
` (2 preceding siblings ...)
2025-08-21 17:10 ` ✗ CI.KUnit: failure " Patchwork
@ 2025-08-22 8:37 ` Matthew Auld
2025-08-22 12:28 ` Matthew Auld
3 siblings, 1 reply; 10+ messages in thread
From: Matthew Auld @ 2025-08-22 8:37 UTC (permalink / raw)
To: Arunpravin Paneer Selvam, christian.koenig, dri-devel, amd-gfx,
intel-gfx, intel-xe
Cc: alexander.deucher
On 21/08/2025 16:06, Arunpravin Paneer Selvam wrote:
> Replace the freelist (O(n)) used for free block management with a
> red-black tree, providing more efficient O(log n) search, insert,
> and delete operations. This improves scalability and performance
> when managing large numbers of free blocks per order (e.g., hundreds
> or thousands).
>
> In the VK-CTS memory stress subtest, the buddy manager merges
> fragmented memory and inserts freed blocks into the freelist. Since
> freelist insertion is O(n), this becomes a bottleneck as fragmentation
> increases. Benchmarking shows list_insert_sorted() consumes ~52.69% CPU
> with the freelist, compared to just 0.03% with the RB tree
> (rbtree_insert.isra.0), despite performing the same sorted insert.
>
> This also improves performance in heavily fragmented workloads,
> such as games or graphics tests that stress memory.
>
> v3(Matthew):
> - Remove RB_EMPTY_NODE check in force_merge function.
> - Rename rb for loop macros to have less generic names and move to
> .c file.
> - Make the rb node rb and link field as union.
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
CI is reporting a crash in rb_erase when running the drm_buddy kunit,
somewhere in drm_test_buddy_alloc_clear it seems.
> ---
> drivers/gpu/drm/drm_buddy.c | 175 +++++++++++++++++++++++++-----------
> include/drm/drm_buddy.h | 9 +-
> 2 files changed, 130 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index a94061f373de..92226a46cc2c 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -14,6 +14,41 @@
>
> static struct kmem_cache *slab_blocks;
>
> +/*
> + * for_each_rb_free_block() - iterate over an RB tree in order
> + * @pos: the struct type * to use as a loop cursor
> + * @root: pointer to struct rb_root to iterate
> + * @member: name of the rb_node field within the struct
> + */
> +#define for_each_rb_free_block(pos, root, member) \
> + for (pos = rb_entry_safe(rb_first(root), typeof(*pos), member); \
> + pos; \
> + pos = rb_entry_safe(rb_next(&(pos)->member), typeof(*pos), member))
> +
> +/*
> + * for_each_rb_free_block_reverse() - iterate over an RB tree in reverse order
> + * @pos: the struct type * to use as a loop cursor
> + * @root: pointer to struct rb_root to iterate
> + * @member: name of the rb_node field within the struct
> + */
> +#define for_each_rb_free_block_reverse(pos, root, member) \
> + for (pos = rb_entry_safe(rb_last(root), typeof(*pos), member); \
> + pos; \
> + pos = rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), member))
> +
> +/**
> + * for_each_rb_free_block_reverse_safe() - safely iterate over an RB tree in reverse order
> + * @pos: the struct type * to use as a loop cursor.
> + * @n: another struct type * to use as temporary storage.
> + * @root: pointer to struct rb_root to iterate.
> + * @member: name of the rb_node field within the struct.
> + */
> +#define for_each_rb_free_block_reverse_safe(pos, n, root, member) \
> + for (pos = rb_entry_safe(rb_last(root), typeof(*pos), member), \
> + n = pos ? rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), member) : NULL; \
> + pos; \
> + pos = n, n = pos ? rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), member) : NULL)
> +
> static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
> struct drm_buddy_block *parent,
> unsigned int order,
> @@ -31,6 +66,8 @@ static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
> block->header |= order;
> block->parent = parent;
>
> + RB_CLEAR_NODE(&block->rb);
> +
> BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
> return block;
> }
> @@ -41,23 +78,53 @@ static void drm_block_free(struct drm_buddy *mm,
> kmem_cache_free(slab_blocks, block);
> }
>
> -static void list_insert_sorted(struct drm_buddy *mm,
> - struct drm_buddy_block *block)
> +static void rbtree_insert(struct drm_buddy *mm,
> + struct drm_buddy_block *block)
> {
> + struct rb_root *root = &mm->free_tree[drm_buddy_block_order(block)];
> + struct rb_node **link = &root->rb_node;
> + struct rb_node *parent = NULL;
> struct drm_buddy_block *node;
> - struct list_head *head;
> + u64 offset;
> +
> + offset = drm_buddy_block_offset(block);
>
> - head = &mm->free_list[drm_buddy_block_order(block)];
> - if (list_empty(head)) {
> - list_add(&block->link, head);
> - return;
> + while (*link) {
> + parent = *link;
> + node = rb_entry(parent, struct drm_buddy_block, rb);
> +
> + if (offset < drm_buddy_block_offset(node))
> + link = &parent->rb_left;
> + else
> + link = &parent->rb_right;
> }
>
> - list_for_each_entry(node, head, link)
> - if (drm_buddy_block_offset(block) < drm_buddy_block_offset(node))
> - break;
> + rb_link_node(&block->rb, parent, link);
> + rb_insert_color(&block->rb, root);
> +}
>
> - __list_add(&block->link, node->link.prev, &node->link);
> +static void rbtree_remove(struct drm_buddy *mm,
> + struct drm_buddy_block *block)
> +{
> + struct rb_root *root;
> +
> + root = &mm->free_tree[drm_buddy_block_order(block)];
> + rb_erase(&block->rb, root);
> +
> + RB_CLEAR_NODE(&block->rb);
> +}
> +
> +static inline struct drm_buddy_block *
> +rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
> +{
> + struct rb_node *node = rb_last(&mm->free_tree[order]);
> +
> + return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
> +}
> +
> +static bool rbtree_is_empty(struct drm_buddy *mm, unsigned int order)
> +{
> + return RB_EMPTY_ROOT(&mm->free_tree[order]);
> }
>
> static void clear_reset(struct drm_buddy_block *block)
> @@ -70,12 +137,13 @@ static void mark_cleared(struct drm_buddy_block *block)
> block->header |= DRM_BUDDY_HEADER_CLEAR;
> }
>
> -static void mark_allocated(struct drm_buddy_block *block)
> +static void mark_allocated(struct drm_buddy *mm,
> + struct drm_buddy_block *block)
> {
> block->header &= ~DRM_BUDDY_HEADER_STATE;
> block->header |= DRM_BUDDY_ALLOCATED;
>
> - list_del(&block->link);
> + rbtree_remove(mm, block);
> }
>
> static void mark_free(struct drm_buddy *mm,
> @@ -84,15 +152,16 @@ static void mark_free(struct drm_buddy *mm,
> block->header &= ~DRM_BUDDY_HEADER_STATE;
> block->header |= DRM_BUDDY_FREE;
>
> - list_insert_sorted(mm, block);
> + rbtree_insert(mm, block);
> }
>
> -static void mark_split(struct drm_buddy_block *block)
> +static void mark_split(struct drm_buddy *mm,
> + struct drm_buddy_block *block)
> {
> block->header &= ~DRM_BUDDY_HEADER_STATE;
> block->header |= DRM_BUDDY_SPLIT;
>
> - list_del(&block->link);
> + rbtree_remove(mm, block);
> }
>
> static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
> @@ -148,7 +217,7 @@ static unsigned int __drm_buddy_free(struct drm_buddy *mm,
> mark_cleared(parent);
> }
>
> - list_del(&buddy->link);
> + rbtree_remove(mm, buddy);
> if (force_merge && drm_buddy_block_is_clear(buddy))
> mm->clear_avail -= drm_buddy_block_size(mm, buddy);
>
> @@ -179,9 +248,11 @@ static int __force_merge(struct drm_buddy *mm,
> return -EINVAL;
>
> for (i = min_order - 1; i >= 0; i--) {
> - struct drm_buddy_block *block, *prev;
> + struct drm_buddy_block *block, *prev_block, *first_block;
>
> - list_for_each_entry_safe_reverse(block, prev, &mm->free_list[i], link) {
> + first_block = rb_entry(rb_first(&mm->free_tree[i]), struct drm_buddy_block, rb);
> +
> + for_each_rb_free_block_reverse_safe(block, prev_block, &mm->free_tree[i], rb) {
> struct drm_buddy_block *buddy;
> u64 block_start, block_end;
>
> @@ -206,10 +277,14 @@ static int __force_merge(struct drm_buddy *mm,
> * block in the next iteration as we would free the
> * buddy block as part of the free function.
> */
> - if (prev == buddy)
> - prev = list_prev_entry(prev, link);
> + if (prev_block && prev_block == buddy) {
> + if (prev_block != first_block)
> + prev_block = rb_entry(rb_prev(&prev_block->rb),
> + struct drm_buddy_block,
> + rb);
> + }
>
> - list_del(&block->link);
> + rbtree_remove(mm, block);
> if (drm_buddy_block_is_clear(block))
> mm->clear_avail -= drm_buddy_block_size(mm, block);
>
> @@ -258,14 +333,14 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
>
> BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
>
> - mm->free_list = kmalloc_array(mm->max_order + 1,
> - sizeof(struct list_head),
> + mm->free_tree = kmalloc_array(mm->max_order + 1,
> + sizeof(struct rb_root),
> GFP_KERNEL);
> - if (!mm->free_list)
> + if (!mm->free_tree)
> return -ENOMEM;
>
> for (i = 0; i <= mm->max_order; ++i)
> - INIT_LIST_HEAD(&mm->free_list[i]);
> + mm->free_tree[i] = RB_ROOT;
>
> mm->n_roots = hweight64(size);
>
> @@ -273,7 +348,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
> sizeof(struct drm_buddy_block *),
> GFP_KERNEL);
> if (!mm->roots)
> - goto out_free_list;
> + goto out_free_tree;
>
> offset = 0;
> i = 0;
> @@ -312,8 +387,8 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
> while (i--)
> drm_block_free(mm, mm->roots[i]);
> kfree(mm->roots);
> -out_free_list:
> - kfree(mm->free_list);
> +out_free_tree:
> + kfree(mm->free_tree);
> return -ENOMEM;
> }
> EXPORT_SYMBOL(drm_buddy_init);
> @@ -323,7 +398,7 @@ EXPORT_SYMBOL(drm_buddy_init);
> *
> * @mm: DRM buddy manager to free
> *
> - * Cleanup memory manager resources and the freelist
> + * Cleanup memory manager resources and the freetree
> */
> void drm_buddy_fini(struct drm_buddy *mm)
> {
> @@ -350,7 +425,7 @@ void drm_buddy_fini(struct drm_buddy *mm)
> WARN_ON(mm->avail != mm->size);
>
> kfree(mm->roots);
> - kfree(mm->free_list);
> + kfree(mm->free_tree);
> }
> EXPORT_SYMBOL(drm_buddy_fini);
>
> @@ -383,7 +458,7 @@ static int split_block(struct drm_buddy *mm,
> clear_reset(block);
> }
>
> - mark_split(block);
> + mark_split(mm, block);
>
> return 0;
> }
> @@ -433,7 +508,7 @@ void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
> for (i = 0; i <= mm->max_order; ++i) {
> struct drm_buddy_block *block;
>
> - list_for_each_entry_reverse(block, &mm->free_list[i], link) {
> + for_each_rb_free_block_reverse(block, &mm->free_tree[i], rb) {
> if (is_clear != drm_buddy_block_is_clear(block)) {
> if (is_clear) {
> mark_cleared(block);
> @@ -641,7 +716,7 @@ get_maxblock(struct drm_buddy *mm, unsigned int order,
> for (i = order; i <= mm->max_order; ++i) {
> struct drm_buddy_block *tmp_block;
>
> - list_for_each_entry_reverse(tmp_block, &mm->free_list[i], link) {
> + for_each_rb_free_block_reverse(tmp_block, &mm->free_tree[i], rb) {
> if (block_incompatible(tmp_block, flags))
> continue;
>
> @@ -667,7 +742,7 @@ get_maxblock(struct drm_buddy *mm, unsigned int order,
> }
>
> static struct drm_buddy_block *
> -alloc_from_freelist(struct drm_buddy *mm,
> +alloc_from_freetree(struct drm_buddy *mm,
> unsigned int order,
> unsigned long flags)
> {
> @@ -684,7 +759,7 @@ alloc_from_freelist(struct drm_buddy *mm,
> for (tmp = order; tmp <= mm->max_order; ++tmp) {
> struct drm_buddy_block *tmp_block;
>
> - list_for_each_entry_reverse(tmp_block, &mm->free_list[tmp], link) {
> + for_each_rb_free_block_reverse(tmp_block, &mm->free_tree[tmp], rb) {
> if (block_incompatible(tmp_block, flags))
> continue;
>
> @@ -700,10 +775,8 @@ alloc_from_freelist(struct drm_buddy *mm,
> if (!block) {
> /* Fallback method */
> for (tmp = order; tmp <= mm->max_order; ++tmp) {
> - if (!list_empty(&mm->free_list[tmp])) {
> - block = list_last_entry(&mm->free_list[tmp],
> - struct drm_buddy_block,
> - link);
> + if (!rbtree_is_empty(mm, tmp)) {
> + block = rbtree_last_entry(mm, tmp);
> if (block)
> break;
> }
> @@ -771,7 +844,7 @@ static int __alloc_range(struct drm_buddy *mm,
>
> if (contains(start, end, block_start, block_end)) {
> if (drm_buddy_block_is_free(block)) {
> - mark_allocated(block);
> + mark_allocated(mm, block);
> total_allocated += drm_buddy_block_size(mm, block);
> mm->avail -= drm_buddy_block_size(mm, block);
> if (drm_buddy_block_is_clear(block))
> @@ -849,7 +922,6 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
> {
> u64 rhs_offset, lhs_offset, lhs_size, filled;
> struct drm_buddy_block *block;
> - struct list_head *list;
> LIST_HEAD(blocks_lhs);
> unsigned long pages;
> unsigned int order;
> @@ -862,11 +934,10 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
> if (order == 0)
> return -ENOSPC;
>
> - list = &mm->free_list[order];
> - if (list_empty(list))
> + if (rbtree_is_empty(mm, order))
> return -ENOSPC;
>
> - list_for_each_entry_reverse(block, list, link) {
> + for_each_rb_free_block_reverse(block, &mm->free_tree[order], rb) {
> /* Allocate blocks traversing RHS */
> rhs_offset = drm_buddy_block_offset(block);
> err = __drm_buddy_alloc_range(mm, rhs_offset, size,
> @@ -976,7 +1047,7 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
> list_add(&block->tmp_link, &dfs);
> err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
> if (err) {
> - mark_allocated(block);
> + mark_allocated(mm, block);
> mm->avail -= drm_buddy_block_size(mm, block);
> if (drm_buddy_block_is_clear(block))
> mm->clear_avail -= drm_buddy_block_size(mm, block);
> @@ -999,8 +1070,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
> return __drm_buddy_alloc_range_bias(mm, start, end,
> order, flags);
> else
> - /* Allocate from freelist */
> - return alloc_from_freelist(mm, order, flags);
> + /* Allocate from freetree */
> + return alloc_from_freetree(mm, order, flags);
> }
>
> /**
> @@ -1017,8 +1088,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
> * alloc_range_bias() called on range limitations, which traverses
> * the tree and returns the desired block.
> *
> - * alloc_from_freelist() called when *no* range restrictions
> - * are enforced, which picks the block from the freelist.
> + * alloc_from_freetree() called when *no* range restrictions
> + * are enforced, which picks the block from the freetree.
> *
> * Returns:
> * 0 on success, error code on failure.
> @@ -1120,7 +1191,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> }
> } while (1);
>
> - mark_allocated(block);
> + mark_allocated(mm, block);
> mm->avail -= drm_buddy_block_size(mm, block);
> if (drm_buddy_block_is_clear(block))
> mm->clear_avail -= drm_buddy_block_size(mm, block);
> @@ -1204,7 +1275,7 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
> struct drm_buddy_block *block;
> u64 count = 0, free;
>
> - list_for_each_entry(block, &mm->free_list[order], link) {
> + for_each_rb_free_block(block, &mm->free_tree[order], rb) {
> BUG_ON(!drm_buddy_block_is_free(block));
> count++;
> }
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index 513837632b7d..091823592034 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -10,6 +10,7 @@
> #include <linux/list.h>
> #include <linux/slab.h>
> #include <linux/sched.h>
> +#include <linux/rbtree.h>
>
> #include <drm/drm_print.h>
>
> @@ -53,7 +54,11 @@ struct drm_buddy_block {
> * a list, if so desired. As soon as the block is freed with
> * drm_buddy_free* ownership is given back to the mm.
> */
> - struct list_head link;
> + union {
> + struct rb_node rb;
> + struct list_head link;
> + };
> +
> struct list_head tmp_link;
> };
>
> @@ -68,7 +73,7 @@ struct drm_buddy_block {
> */
> struct drm_buddy {
> /* Maintain a free list for each order. */
> - struct list_head *free_list;
> + struct rb_root *free_tree;
>
> /*
> * Maintain explicit binary tree(s) to track the allocation of the
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v3 2/2] drm/buddy: Separate clear and dirty free block trees
2025-08-21 15:06 ` [PATCH v3 2/2] drm/buddy: Separate clear and dirty free block trees Arunpravin Paneer Selvam
@ 2025-08-22 12:02 ` Matthew Auld
2025-08-25 11:31 ` Arunpravin Paneer Selvam
0 siblings, 1 reply; 10+ messages in thread
From: Matthew Auld @ 2025-08-22 12:02 UTC (permalink / raw)
To: Arunpravin Paneer Selvam, christian.koenig, dri-devel, amd-gfx,
intel-gfx, intel-xe
Cc: alexander.deucher
On 21/08/2025 16:06, Arunpravin Paneer Selvam wrote:
> Maintain two separate RB trees per order - one for clear (zeroed) blocks
> and another for dirty (uncleared) blocks. This separation improves
> code clarity and makes it more obvious which tree is being searched
> during allocation. It also improves scalability and efficiency when
> searching for a specific type of block, avoiding unnecessary checks
> and making the allocator more predictable under fragmentation.
>
> The changes have been validated using the existing drm_buddy_test
> KUnit test cases, along with selected graphics workloads,
> to ensure correctness and avoid regressions.
>
> v2: Missed adding the suggested-by tag. Added it in v2.
> v3(Matthew):
> - Remove the double underscores from the internal functions.
> - Rename the internal functions to have less generic names.
> - Fix the error handling code.
> - Pass tree argument for the tree macro.
> - Use the existing dirty/free bit instead of new tree field.
> - Make free_trees[] instead of clear_tree and dirty_tree for
> more cleaner approach.
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> Suggested-by: Matthew Auld <matthew.auld@intel.com>
> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4260
> ---
> drivers/gpu/drm/drm_buddy.c | 342 ++++++++++++++++++++++--------------
> include/drm/drm_buddy.h | 8 +-
> 2 files changed, 215 insertions(+), 135 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index 92226a46cc2c..f57c384889a9 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -14,6 +14,9 @@
>
> static struct kmem_cache *slab_blocks;
>
> +#define for_each_free_tree(tree) \
> + for ((tree) = CLEAR_TREE; (tree) < MAX_FREE_TREES; (tree)++)
> +
> /*
> * for_each_rb_free_block() - iterate over an RB tree in order
> * @pos: the struct type * to use as a loop cursor
> @@ -78,22 +81,77 @@ static void drm_block_free(struct drm_buddy *mm,
> kmem_cache_free(slab_blocks, block);
> }
>
> +static inline struct rb_root *
> +get_root(struct drm_buddy *mm,
> + unsigned int order,
> + enum free_tree tree)
> +{
> + if (tree == CLEAR_TREE)
> + return &mm->free_trees[CLEAR_TREE][order];
> + else
> + return &mm->free_trees[DIRTY_TREE][order];
I think we can replace this with something like:
return &mm->free_trees[tree][order];
> +}
> +
> +static inline enum free_tree
> +get_tree_for_block(struct drm_buddy_block *block)
> +{
> + return drm_buddy_block_is_clear(block) ? CLEAR_TREE : DIRTY_TREE;
> +}
> +
> +static inline enum free_tree
> +get_tree_for_flags(unsigned long flags)
> +{
> + return (flags & DRM_BUDDY_CLEAR_ALLOCATION) ? CLEAR_TREE : DIRTY_TREE;
> +}
> +
> +static inline struct drm_buddy_block *
> +rbtree_get_free_block(struct rb_node *node)
> +{
> + return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
> +}
> +
> +static inline struct drm_buddy_block *
> +rbtree_prev_free_block(struct rb_node *node)
> +{
> + return rbtree_get_free_block(rb_prev(node));
> +}
> +
> +static inline struct drm_buddy_block *
> +rbtree_first_free_block(struct rb_root *root)
> +{
> + return rbtree_get_free_block(rb_first(root));
> +}
> +
> +static inline struct drm_buddy_block *
> +rbtree_last_free_block(struct rb_root *root)
> +{
> + return rbtree_get_free_block(rb_last(root));
> +}
> +
> +static inline bool rbtree_is_empty(struct rb_root *root)
> +{
> + return RB_EMPTY_ROOT(root);
> +}
> +
> static void rbtree_insert(struct drm_buddy *mm,
> - struct drm_buddy_block *block)
> + struct drm_buddy_block *block,
> + enum free_tree tree)
> {
> - struct rb_root *root = &mm->free_tree[drm_buddy_block_order(block)];
> - struct rb_node **link = &root->rb_node;
> - struct rb_node *parent = NULL;
> + struct rb_node **link, *parent = NULL;
> struct drm_buddy_block *node;
> - u64 offset;
> + struct rb_root *root;
> + unsigned int order;
> +
> + order = drm_buddy_block_order(block);
>
> - offset = drm_buddy_block_offset(block);
> + root = get_root(mm, order, tree);
> + link = &root->rb_node;
>
> while (*link) {
> parent = *link;
> - node = rb_entry(parent, struct drm_buddy_block, rb);
> + node = rbtree_get_free_block(parent);
>
> - if (offset < drm_buddy_block_offset(node))
> + if (drm_buddy_block_offset(block) < drm_buddy_block_offset(node))
> link = &parent->rb_left;
> else
> link = &parent->rb_right;
> @@ -106,27 +164,19 @@ static void rbtree_insert(struct drm_buddy *mm,
> static void rbtree_remove(struct drm_buddy *mm,
> struct drm_buddy_block *block)
> {
> + unsigned int order = drm_buddy_block_order(block);
> + enum free_tree tree;
> struct rb_root *root;
>
> - root = &mm->free_tree[drm_buddy_block_order(block)];
> + tree = drm_buddy_block_is_clear(block) ?
> + CLEAR_TREE : DIRTY_TREE;
> +
> + root = get_root(mm, order, tree);
> rb_erase(&block->rb, root);
>
> RB_CLEAR_NODE(&block->rb);
> }
>
> -static inline struct drm_buddy_block *
> -rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
> -{
> - struct rb_node *node = rb_last(&mm->free_tree[order]);
> -
> - return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
> -}
> -
> -static bool rbtree_is_empty(struct drm_buddy *mm, unsigned int order)
> -{
> - return RB_EMPTY_ROOT(&mm->free_tree[order]);
> -}
> -
> static void clear_reset(struct drm_buddy_block *block)
> {
> block->header &= ~DRM_BUDDY_HEADER_CLEAR;
> @@ -149,10 +199,14 @@ static void mark_allocated(struct drm_buddy *mm,
> static void mark_free(struct drm_buddy *mm,
> struct drm_buddy_block *block)
> {
> + enum free_tree tree;
> +
> block->header &= ~DRM_BUDDY_HEADER_STATE;
> block->header |= DRM_BUDDY_FREE;
>
> - rbtree_insert(mm, block);
> + tree = get_tree_for_block(block);
> +
> + rbtree_insert(mm, block, tree);
> }
>
> static void mark_split(struct drm_buddy *mm,
> @@ -238,6 +292,7 @@ static int __force_merge(struct drm_buddy *mm,
> u64 end,
> unsigned int min_order)
> {
> + enum free_tree tree;
> unsigned int order;
> int i;
>
> @@ -247,50 +302,49 @@ static int __force_merge(struct drm_buddy *mm,
> if (min_order > mm->max_order)
> return -EINVAL;
>
> - for (i = min_order - 1; i >= 0; i--) {
> - struct drm_buddy_block *block, *prev_block, *first_block;
> -
> - first_block = rb_entry(rb_first(&mm->free_tree[i]), struct drm_buddy_block, rb);
> + for_each_free_tree(tree) {
> + for (i = min_order - 1; i >= 0; i--) {
> + struct rb_root *root = get_root(mm, i, tree);
> + struct drm_buddy_block *block, *prev_block;
>
> - for_each_rb_free_block_reverse_safe(block, prev_block, &mm->free_tree[i], rb) {
> - struct drm_buddy_block *buddy;
> - u64 block_start, block_end;
> + for_each_rb_free_block_reverse_safe(block, prev_block, root, rb) {
> + struct drm_buddy_block *buddy;
> + u64 block_start, block_end;
>
> - if (!block->parent)
> - continue;
> + if (!block->parent)
> + continue;
>
> - block_start = drm_buddy_block_offset(block);
> - block_end = block_start + drm_buddy_block_size(mm, block) - 1;
> + block_start = drm_buddy_block_offset(block);
> + block_end = block_start + drm_buddy_block_size(mm, block) - 1;
>
> - if (!contains(start, end, block_start, block_end))
> - continue;
> + if (!contains(start, end, block_start, block_end))
> + continue;
>
> - buddy = __get_buddy(block);
> - if (!drm_buddy_block_is_free(buddy))
> - continue;
> + buddy = __get_buddy(block);
> + if (!drm_buddy_block_is_free(buddy))
> + continue;
>
> - WARN_ON(drm_buddy_block_is_clear(block) ==
> - drm_buddy_block_is_clear(buddy));
> + WARN_ON(drm_buddy_block_is_clear(block) ==
> + drm_buddy_block_is_clear(buddy));
>
> - /*
> - * If the prev block is same as buddy, don't access the
> - * block in the next iteration as we would free the
> - * buddy block as part of the free function.
> - */
> - if (prev_block && prev_block == buddy) {
> - if (prev_block != first_block)
> - prev_block = rb_entry(rb_prev(&prev_block->rb),
> - struct drm_buddy_block,
> - rb);
> - }
> + /*
> + * If the prev block is same as buddy, don't access the
> + * block in the next iteration as we would free the
> + * buddy block as part of the free function.
> + */
> + if (prev_block && prev_block == buddy) {
> + if (prev_block != rbtree_first_free_block(root))
> + prev_block = rbtree_prev_free_block(&prev_block->rb);
> + }
>
> - rbtree_remove(mm, block);
> - if (drm_buddy_block_is_clear(block))
> - mm->clear_avail -= drm_buddy_block_size(mm, block);
> + rbtree_remove(mm, block);
> + if (drm_buddy_block_is_clear(block))
> + mm->clear_avail -= drm_buddy_block_size(mm, block);
>
> - order = __drm_buddy_free(mm, block, true);
> - if (order >= min_order)
> - return 0;
> + order = __drm_buddy_free(mm, block, true);
> + if (order >= min_order)
> + return 0;
> + }
> }
> }
>
> @@ -311,7 +365,7 @@ static int __force_merge(struct drm_buddy *mm,
> */
> int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
> {
> - unsigned int i;
> + unsigned int i, j;
> u64 offset;
>
> if (size < chunk_size)
> @@ -333,14 +387,16 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
>
> BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
>
> - mm->free_tree = kmalloc_array(mm->max_order + 1,
> - sizeof(struct rb_root),
> - GFP_KERNEL);
> - if (!mm->free_tree)
> - return -ENOMEM;
> + for (i = 0; i < MAX_FREE_TREES; i++) {
> + mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
> + sizeof(struct rb_root),
> + GFP_KERNEL);
> + if (!mm->free_trees[i])
> + goto out_free_tree;
>
> - for (i = 0; i <= mm->max_order; ++i)
> - mm->free_tree[i] = RB_ROOT;
> + for (j = 0; j <= mm->max_order; ++j)
> + mm->free_trees[i][j] = RB_ROOT;
> + }
>
> mm->n_roots = hweight64(size);
>
> @@ -388,7 +444,8 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
> drm_block_free(mm, mm->roots[i]);
> kfree(mm->roots);
> out_free_tree:
> - kfree(mm->free_tree);
> + while (i--)
> + kfree(mm->free_trees[i]);
> return -ENOMEM;
> }
> EXPORT_SYMBOL(drm_buddy_init);
> @@ -424,8 +481,9 @@ void drm_buddy_fini(struct drm_buddy *mm)
>
> WARN_ON(mm->avail != mm->size);
>
> + for (i = 0; i < MAX_FREE_TREES; i++)
> + kfree(mm->free_trees[i]);
> kfree(mm->roots);
> - kfree(mm->free_tree);
> }
> EXPORT_SYMBOL(drm_buddy_fini);
>
> @@ -449,15 +507,15 @@ static int split_block(struct drm_buddy *mm,
> return -ENOMEM;
> }
>
> - mark_free(mm, block->left);
> - mark_free(mm, block->right);
> -
> if (drm_buddy_block_is_clear(block)) {
> mark_cleared(block->left);
> mark_cleared(block->right);
> clear_reset(block);
> }
>
> + mark_free(mm, block->left);
> + mark_free(mm, block->right);
> +
> mark_split(mm, block);
>
> return 0;
> @@ -491,6 +549,7 @@ EXPORT_SYMBOL(drm_get_buddy);
> */
> void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
> {
> + enum free_tree src_tree, dst_tree;
> u64 root_size, size, start;
> unsigned int order;
> int i;
> @@ -505,19 +564,24 @@ void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
> size -= root_size;
> }
>
> + src_tree = is_clear ? DIRTY_TREE : CLEAR_TREE;
> + dst_tree = is_clear ? CLEAR_TREE : DIRTY_TREE;
> +
> for (i = 0; i <= mm->max_order; ++i) {
> + struct rb_root *root = get_root(mm, order, src_tree);
> struct drm_buddy_block *block;
>
> - for_each_rb_free_block_reverse(block, &mm->free_tree[i], rb) {
> - if (is_clear != drm_buddy_block_is_clear(block)) {
> - if (is_clear) {
> - mark_cleared(block);
> - mm->clear_avail += drm_buddy_block_size(mm, block);
> - } else {
> - clear_reset(block);
> - mm->clear_avail -= drm_buddy_block_size(mm, block);
> - }
> + for_each_rb_free_block_reverse(block, root, rb) {
> + rbtree_remove(mm, block);
> + if (is_clear) {
> + mark_cleared(block);
> + mm->clear_avail += drm_buddy_block_size(mm, block);
> + } else {
> + clear_reset(block);
> + mm->clear_avail -= drm_buddy_block_size(mm, block);
> }
> +
> + rbtree_insert(mm, block, dst_tree);
> }
> }
> }
> @@ -707,26 +771,22 @@ __drm_buddy_alloc_range_bias(struct drm_buddy *mm,
> }
>
> static struct drm_buddy_block *
> -get_maxblock(struct drm_buddy *mm, unsigned int order,
> - unsigned long flags)
> +get_maxblock(struct drm_buddy *mm,
> + unsigned int order,
> + enum free_tree tree)
> {
> struct drm_buddy_block *max_block = NULL, *block = NULL;
> + struct rb_root *root;
> unsigned int i;
>
> for (i = order; i <= mm->max_order; ++i) {
> - struct drm_buddy_block *tmp_block;
> -
> - for_each_rb_free_block_reverse(tmp_block, &mm->free_tree[i], rb) {
> - if (block_incompatible(tmp_block, flags))
> + root = get_root(mm, i, tree);
> + if (!rbtree_is_empty(root)) {
> + block = rbtree_last_free_block(root);
> + if (!block)
> continue;
> -
> - block = tmp_block;
> - break;
> }
>
> - if (!block)
> - continue;
> -
> if (!max_block) {
> max_block = block;
> continue;
> @@ -747,36 +807,38 @@ alloc_from_freetree(struct drm_buddy *mm,
> unsigned long flags)
> {
> struct drm_buddy_block *block = NULL;
> + struct rb_root *root;
> + enum free_tree tree;
> unsigned int tmp;
> int err;
>
> + tree = get_tree_for_flags(flags);
> +
> if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
> - block = get_maxblock(mm, order, flags);
> + block = get_maxblock(mm, order, tree);
> if (block)
> /* Store the obtained block order */
> tmp = drm_buddy_block_order(block);
> } else {
> for (tmp = order; tmp <= mm->max_order; ++tmp) {
> - struct drm_buddy_block *tmp_block;
> -
> - for_each_rb_free_block_reverse(tmp_block, &mm->free_tree[tmp], rb) {
> - if (block_incompatible(tmp_block, flags))
> - continue;
> -
> - block = tmp_block;
> - break;
> + /* Get RB tree root for this order and tree */
> + root = get_root(mm, tmp, tree);
> + if (!rbtree_is_empty(root)) {
Do we need this check? last_free_block should just return NULL? Or if
this is somehow a cheaper check, maybe we should move it into the helper
instead?
> + block = rbtree_last_free_block(root);
> + if (block)
> + break;
> }
> -
> - if (block)
> - break;
> }
> }
>
> if (!block) {
> - /* Fallback method */
> + /* Try allocating from the other tree */
> + tree = (tree == CLEAR_TREE) ? DIRTY_TREE : CLEAR_TREE;
> +
> for (tmp = order; tmp <= mm->max_order; ++tmp) {
> - if (!rbtree_is_empty(mm, tmp)) {
> - block = rbtree_last_entry(mm, tmp);
> + root = get_root(mm, tmp, tree);
> + if (!rbtree_is_empty(root)) {
Here also.
> + block = rbtree_last_free_block(root);
> if (block)
> break;
> }
> @@ -923,6 +985,7 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
> u64 rhs_offset, lhs_offset, lhs_size, filled;
> struct drm_buddy_block *block;
> LIST_HEAD(blocks_lhs);
> + enum free_tree tree;
> unsigned long pages;
> unsigned int order;
> u64 modify_size;
> @@ -934,34 +997,39 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
> if (order == 0)
> return -ENOSPC;
>
> - if (rbtree_is_empty(mm, order))
> + if (rbtree_is_empty(get_root(mm, order, CLEAR_TREE)) &&
> + rbtree_is_empty(get_root(mm, order, DIRTY_TREE)))
> return -ENOSPC;
>
> - for_each_rb_free_block_reverse(block, &mm->free_tree[order], rb) {
> - /* Allocate blocks traversing RHS */
> - rhs_offset = drm_buddy_block_offset(block);
> - err = __drm_buddy_alloc_range(mm, rhs_offset, size,
> - &filled, blocks);
> - if (!err || err != -ENOSPC)
> - return err;
> -
> - lhs_size = max((size - filled), min_block_size);
> - if (!IS_ALIGNED(lhs_size, min_block_size))
> - lhs_size = round_up(lhs_size, min_block_size);
> -
> - /* Allocate blocks traversing LHS */
> - lhs_offset = drm_buddy_block_offset(block) - lhs_size;
> - err = __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
> - NULL, &blocks_lhs);
> - if (!err) {
> - list_splice(&blocks_lhs, blocks);
> - return 0;
> - } else if (err != -ENOSPC) {
> + for_each_free_tree(tree) {
> + struct rb_root *root = get_root(mm, order, tree);
> +
> + for_each_rb_free_block_reverse(block, root, rb) {
> + /* Allocate blocks traversing RHS */
> + rhs_offset = drm_buddy_block_offset(block);
> + err = __drm_buddy_alloc_range(mm, rhs_offset, size,
> + &filled, blocks);
> + if (!err || err != -ENOSPC)
> + return err;
> +
> + lhs_size = max((size - filled), min_block_size);
> + if (!IS_ALIGNED(lhs_size, min_block_size))
> + lhs_size = round_up(lhs_size, min_block_size);
> +
> + /* Allocate blocks traversing LHS */
> + lhs_offset = drm_buddy_block_offset(block) - lhs_size;
> + err = __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
> + NULL, &blocks_lhs);
> + if (!err) {
> + list_splice(&blocks_lhs, blocks);
> + return 0;
> + } else if (err != -ENOSPC) {
> + drm_buddy_free_list_internal(mm, blocks);
> + return err;
> + }
> + /* Free blocks for the next iteration */
> drm_buddy_free_list_internal(mm, blocks);
> - return err;
> }
> - /* Free blocks for the next iteration */
> - drm_buddy_free_list_internal(mm, blocks);
> }
>
> return -ENOSPC;
> @@ -1266,6 +1334,7 @@ EXPORT_SYMBOL(drm_buddy_block_print);
> */
> void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
> {
> + enum free_tree tree;
> int order;
>
> drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
> @@ -1273,11 +1342,16 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
>
> for (order = mm->max_order; order >= 0; order--) {
> struct drm_buddy_block *block;
> + struct rb_root *root;
> u64 count = 0, free;
>
> - for_each_rb_free_block(block, &mm->free_tree[order], rb) {
> - BUG_ON(!drm_buddy_block_is_free(block));
> - count++;
> + for_each_free_tree(tree) {
> + root = get_root(mm, order, tree);
Hmm, actually maybe this helper should just give the root (or both)?
Seems to be what all users want anyway?
> +
> + for_each_rb_free_block(block, root, rb) {
> + BUG_ON(!drm_buddy_block_is_free(block));
> + count++;
> + }
> }
>
> drm_printf(p, "order-%2d ", order);
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index 091823592034..2fc1cc7b78bf 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -14,6 +14,12 @@
>
> #include <drm/drm_print.h>
>
> +enum free_tree {
> + CLEAR_TREE = 0,
> + DIRTY_TREE,
> + MAX_FREE_TREES,
> +};
> +
> #define range_overflows(start, size, max) ({ \
> typeof(start) start__ = (start); \
> typeof(size) size__ = (size); \
> @@ -73,7 +79,7 @@ struct drm_buddy_block {
> */
> struct drm_buddy {
> /* Maintain a free list for each order. */
> - struct rb_root *free_tree;
> + struct rb_root *free_trees[MAX_FREE_TREES];
>
> /*
> * Maintain explicit binary tree(s) to track the allocation of the
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v3 1/2] drm/buddy: Optimize free block management with RB tree
2025-08-22 8:37 ` [PATCH v3 1/2] " Matthew Auld
@ 2025-08-22 12:28 ` Matthew Auld
2025-08-25 11:40 ` Arunpravin Paneer Selvam
0 siblings, 1 reply; 10+ messages in thread
From: Matthew Auld @ 2025-08-22 12:28 UTC (permalink / raw)
To: Arunpravin Paneer Selvam, christian.koenig, dri-devel, amd-gfx,
intel-gfx, intel-xe
Cc: alexander.deucher
On 22/08/2025 09:37, Matthew Auld wrote:
> On 21/08/2025 16:06, Arunpravin Paneer Selvam wrote:
>> Replace the freelist (O(n)) used for free block management with a
>> red-black tree, providing more efficient O(log n) search, insert,
>> and delete operations. This improves scalability and performance
>> when managing large numbers of free blocks per order (e.g., hundreds
>> or thousands).
>>
>> In the VK-CTS memory stress subtest, the buddy manager merges
>> fragmented memory and inserts freed blocks into the freelist. Since
>> freelist insertion is O(n), this becomes a bottleneck as fragmentation
>> increases. Benchmarking shows list_insert_sorted() consumes ~52.69% CPU
>> with the freelist, compared to just 0.03% with the RB tree
>> (rbtree_insert.isra.0), despite performing the same sorted insert.
>>
>> This also improves performance in heavily fragmented workloads,
>> such as games or graphics tests that stress memory.
>>
>> v3(Matthew):
>> - Remove RB_EMPTY_NODE check in force_merge function.
>> - Rename rb for loop macros to have less generic names and move to
>> .c file.
>> - Make the rb node rb and link field as union.
>>
>> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
>
> CI is reporting a crash in rb_erase when running the drm_buddy kunit,
> somewhere in drm_test_buddy_alloc_clear it seems.
Found one bug in the second patch:
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -507,6 +507,8 @@ static int split_block(struct drm_buddy *mm,
return -ENOMEM;
}
+ mark_split(mm, block);
+
if (drm_buddy_block_is_clear(block)) {
mark_cleared(block->left);
mark_cleared(block->right);
@@ -516,8 +518,6 @@ static int split_block(struct drm_buddy *mm,
mark_free(mm, block->left);
mark_free(mm, block->right);
- mark_split(mm, block);
-
return 0;
}
Otherwise the mark_split might get the wrong rb root if we reset the
clear state first. Might help with this crash.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v3 2/2] drm/buddy: Separate clear and dirty free block trees
2025-08-22 12:02 ` Matthew Auld
@ 2025-08-25 11:31 ` Arunpravin Paneer Selvam
2025-08-26 14:32 ` Matthew Auld
0 siblings, 1 reply; 10+ messages in thread
From: Arunpravin Paneer Selvam @ 2025-08-25 11:31 UTC (permalink / raw)
To: Matthew Auld, christian.koenig, dri-devel, amd-gfx, intel-gfx,
intel-xe
Cc: alexander.deucher
On 8/22/2025 5:32 PM, Matthew Auld wrote:
> On 21/08/2025 16:06, Arunpravin Paneer Selvam wrote:
>> Maintain two separate RB trees per order - one for clear (zeroed) blocks
>> and another for dirty (uncleared) blocks. This separation improves
>> code clarity and makes it more obvious which tree is being searched
>> during allocation. It also improves scalability and efficiency when
>> searching for a specific type of block, avoiding unnecessary checks
>> and making the allocator more predictable under fragmentation.
>>
>> The changes have been validated using the existing drm_buddy_test
>> KUnit test cases, along with selected graphics workloads,
>> to ensure correctness and avoid regressions.
>>
>> v2: Missed adding the suggested-by tag. Added it in v2.
>> v3(Matthew):
>> - Remove the double underscores from the internal functions.
>> - Rename the internal functions to have less generic names.
>> - Fix the error handling code.
>> - Pass tree argument for the tree macro.
>> - Use the existing dirty/free bit instead of new tree field.
>> - Make free_trees[] instead of clear_tree and dirty_tree for
>> more cleaner approach.
>>
>> Signed-off-by: Arunpravin Paneer Selvam
>> <Arunpravin.PaneerSelvam@amd.com>
>> Suggested-by: Matthew Auld <matthew.auld@intel.com>
>> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4260
>> ---
>> drivers/gpu/drm/drm_buddy.c | 342 ++++++++++++++++++++++--------------
>> include/drm/drm_buddy.h | 8 +-
>> 2 files changed, 215 insertions(+), 135 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index 92226a46cc2c..f57c384889a9 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -14,6 +14,9 @@
>> static struct kmem_cache *slab_blocks;
>> +#define for_each_free_tree(tree) \
>> + for ((tree) = CLEAR_TREE; (tree) < MAX_FREE_TREES; (tree)++)
>> +
>> /*
>> * for_each_rb_free_block() - iterate over an RB tree in order
>> * @pos: the struct type * to use as a loop cursor
>> @@ -78,22 +81,77 @@ static void drm_block_free(struct drm_buddy *mm,
>> kmem_cache_free(slab_blocks, block);
>> }
>> +static inline struct rb_root *
>> +get_root(struct drm_buddy *mm,
>> + unsigned int order,
>> + enum free_tree tree)
>> +{
>> + if (tree == CLEAR_TREE)
>> + return &mm->free_trees[CLEAR_TREE][order];
>> + else
>> + return &mm->free_trees[DIRTY_TREE][order];
>
> I think we can replace this with something like:
>
> return &mm->free_trees[tree][order];
yes. we can replace with the above.
>
>> +}
>> +
>> +static inline enum free_tree
>> +get_tree_for_block(struct drm_buddy_block *block)
>> +{
>> + return drm_buddy_block_is_clear(block) ? CLEAR_TREE : DIRTY_TREE;
>> +}
>> +
>> +static inline enum free_tree
>> +get_tree_for_flags(unsigned long flags)
>> +{
>> + return (flags & DRM_BUDDY_CLEAR_ALLOCATION) ? CLEAR_TREE :
>> DIRTY_TREE;
>> +}
>> +
>> +static inline struct drm_buddy_block *
>> +rbtree_get_free_block(struct rb_node *node)
>> +{
>> + return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
>> +}
>> +
>> +static inline struct drm_buddy_block *
>> +rbtree_prev_free_block(struct rb_node *node)
>> +{
>> + return rbtree_get_free_block(rb_prev(node));
>> +}
>> +
>> +static inline struct drm_buddy_block *
>> +rbtree_first_free_block(struct rb_root *root)
>> +{
>> + return rbtree_get_free_block(rb_first(root));
>> +}
>> +
>> +static inline struct drm_buddy_block *
>> +rbtree_last_free_block(struct rb_root *root)
>> +{
>> + return rbtree_get_free_block(rb_last(root));
>> +}
>> +
>> +static inline bool rbtree_is_empty(struct rb_root *root)
>> +{
>> + return RB_EMPTY_ROOT(root);
>> +}
>> +
>> static void rbtree_insert(struct drm_buddy *mm,
>> - struct drm_buddy_block *block)
>> + struct drm_buddy_block *block,
>> + enum free_tree tree)
>> {
>> - struct rb_root *root =
>> &mm->free_tree[drm_buddy_block_order(block)];
>> - struct rb_node **link = &root->rb_node;
>> - struct rb_node *parent = NULL;
>> + struct rb_node **link, *parent = NULL;
>> struct drm_buddy_block *node;
>> - u64 offset;
>> + struct rb_root *root;
>> + unsigned int order;
>> +
>> + order = drm_buddy_block_order(block);
>> - offset = drm_buddy_block_offset(block);
>> + root = get_root(mm, order, tree);
>> + link = &root->rb_node;
>> while (*link) {
>> parent = *link;
>> - node = rb_entry(parent, struct drm_buddy_block, rb);
>> + node = rbtree_get_free_block(parent);
>> - if (offset < drm_buddy_block_offset(node))
>> + if (drm_buddy_block_offset(block) <
>> drm_buddy_block_offset(node))
>> link = &parent->rb_left;
>> else
>> link = &parent->rb_right;
>> @@ -106,27 +164,19 @@ static void rbtree_insert(struct drm_buddy *mm,
>> static void rbtree_remove(struct drm_buddy *mm,
>> struct drm_buddy_block *block)
>> {
>> + unsigned int order = drm_buddy_block_order(block);
>> + enum free_tree tree;
>> struct rb_root *root;
>> - root = &mm->free_tree[drm_buddy_block_order(block)];
>> + tree = drm_buddy_block_is_clear(block) ?
>> + CLEAR_TREE : DIRTY_TREE;
>> +
>> + root = get_root(mm, order, tree);
>> rb_erase(&block->rb, root);
>> RB_CLEAR_NODE(&block->rb);
>> }
>> -static inline struct drm_buddy_block *
>> -rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
>> -{
>> - struct rb_node *node = rb_last(&mm->free_tree[order]);
>> -
>> - return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
>> -}
>> -
>> -static bool rbtree_is_empty(struct drm_buddy *mm, unsigned int order)
>> -{
>> - return RB_EMPTY_ROOT(&mm->free_tree[order]);
>> -}
>> -
>> static void clear_reset(struct drm_buddy_block *block)
>> {
>> block->header &= ~DRM_BUDDY_HEADER_CLEAR;
>> @@ -149,10 +199,14 @@ static void mark_allocated(struct drm_buddy *mm,
>> static void mark_free(struct drm_buddy *mm,
>> struct drm_buddy_block *block)
>> {
>> + enum free_tree tree;
>> +
>> block->header &= ~DRM_BUDDY_HEADER_STATE;
>> block->header |= DRM_BUDDY_FREE;
>> - rbtree_insert(mm, block);
>> + tree = get_tree_for_block(block);
>> +
>> + rbtree_insert(mm, block, tree);
>> }
>> static void mark_split(struct drm_buddy *mm,
>> @@ -238,6 +292,7 @@ static int __force_merge(struct drm_buddy *mm,
>> u64 end,
>> unsigned int min_order)
>> {
>> + enum free_tree tree;
>> unsigned int order;
>> int i;
>> @@ -247,50 +302,49 @@ static int __force_merge(struct drm_buddy *mm,
>> if (min_order > mm->max_order)
>> return -EINVAL;
>> - for (i = min_order - 1; i >= 0; i--) {
>> - struct drm_buddy_block *block, *prev_block, *first_block;
>> -
>> - first_block = rb_entry(rb_first(&mm->free_tree[i]), struct
>> drm_buddy_block, rb);
>> + for_each_free_tree(tree) {
>> + for (i = min_order - 1; i >= 0; i--) {
>> + struct rb_root *root = get_root(mm, i, tree);
>> + struct drm_buddy_block *block, *prev_block;
>> - for_each_rb_free_block_reverse_safe(block, prev_block,
>> &mm->free_tree[i], rb) {
>> - struct drm_buddy_block *buddy;
>> - u64 block_start, block_end;
>> + for_each_rb_free_block_reverse_safe(block, prev_block,
>> root, rb) {
>> + struct drm_buddy_block *buddy;
>> + u64 block_start, block_end;
>> - if (!block->parent)
>> - continue;
>> + if (!block->parent)
>> + continue;
>> - block_start = drm_buddy_block_offset(block);
>> - block_end = block_start + drm_buddy_block_size(mm,
>> block) - 1;
>> + block_start = drm_buddy_block_offset(block);
>> + block_end = block_start + drm_buddy_block_size(mm,
>> block) - 1;
>> - if (!contains(start, end, block_start, block_end))
>> - continue;
>> + if (!contains(start, end, block_start, block_end))
>> + continue;
>> - buddy = __get_buddy(block);
>> - if (!drm_buddy_block_is_free(buddy))
>> - continue;
>> + buddy = __get_buddy(block);
>> + if (!drm_buddy_block_is_free(buddy))
>> + continue;
>> - WARN_ON(drm_buddy_block_is_clear(block) ==
>> - drm_buddy_block_is_clear(buddy));
>> + WARN_ON(drm_buddy_block_is_clear(block) ==
>> + drm_buddy_block_is_clear(buddy));
>> - /*
>> - * If the prev block is same as buddy, don't access the
>> - * block in the next iteration as we would free the
>> - * buddy block as part of the free function.
>> - */
>> - if (prev_block && prev_block == buddy) {
>> - if (prev_block != first_block)
>> - prev_block = rb_entry(rb_prev(&prev_block->rb),
>> - struct drm_buddy_block,
>> - rb);
>> - }
>> + /*
>> + * If the prev block is same as buddy, don't access the
>> + * block in the next iteration as we would free the
>> + * buddy block as part of the free function.
>> + */
>> + if (prev_block && prev_block == buddy) {
>> + if (prev_block != rbtree_first_free_block(root))
>> + prev_block =
>> rbtree_prev_free_block(&prev_block->rb);
>> + }
>> - rbtree_remove(mm, block);
>> - if (drm_buddy_block_is_clear(block))
>> - mm->clear_avail -= drm_buddy_block_size(mm, block);
>> + rbtree_remove(mm, block);
>> + if (drm_buddy_block_is_clear(block))
>> + mm->clear_avail -= drm_buddy_block_size(mm, block);
>> - order = __drm_buddy_free(mm, block, true);
>> - if (order >= min_order)
>> - return 0;
>> + order = __drm_buddy_free(mm, block, true);
>> + if (order >= min_order)
>> + return 0;
>> + }
>> }
>> }
>> @@ -311,7 +365,7 @@ static int __force_merge(struct drm_buddy *mm,
>> */
>> int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
>> {
>> - unsigned int i;
>> + unsigned int i, j;
>> u64 offset;
>> if (size < chunk_size)
>> @@ -333,14 +387,16 @@ int drm_buddy_init(struct drm_buddy *mm, u64
>> size, u64 chunk_size)
>> BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
>> - mm->free_tree = kmalloc_array(mm->max_order + 1,
>> - sizeof(struct rb_root),
>> - GFP_KERNEL);
>> - if (!mm->free_tree)
>> - return -ENOMEM;
>> + for (i = 0; i < MAX_FREE_TREES; i++) {
>> + mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
>> + sizeof(struct rb_root),
>> + GFP_KERNEL);
>> + if (!mm->free_trees[i])
>> + goto out_free_tree;
>> - for (i = 0; i <= mm->max_order; ++i)
>> - mm->free_tree[i] = RB_ROOT;
>> + for (j = 0; j <= mm->max_order; ++j)
>> + mm->free_trees[i][j] = RB_ROOT;
>> + }
>> mm->n_roots = hweight64(size);
>> @@ -388,7 +444,8 @@ int drm_buddy_init(struct drm_buddy *mm, u64
>> size, u64 chunk_size)
>> drm_block_free(mm, mm->roots[i]);
>> kfree(mm->roots);
>> out_free_tree:
>> - kfree(mm->free_tree);
>> + while (i--)
>> + kfree(mm->free_trees[i]);
>> return -ENOMEM;
>> }
>> EXPORT_SYMBOL(drm_buddy_init);
>> @@ -424,8 +481,9 @@ void drm_buddy_fini(struct drm_buddy *mm)
>> WARN_ON(mm->avail != mm->size);
>> + for (i = 0; i < MAX_FREE_TREES; i++)
>> + kfree(mm->free_trees[i]);
>> kfree(mm->roots);
>> - kfree(mm->free_tree);
>> }
>> EXPORT_SYMBOL(drm_buddy_fini);
>> @@ -449,15 +507,15 @@ static int split_block(struct drm_buddy *mm,
>> return -ENOMEM;
>> }
>> - mark_free(mm, block->left);
>> - mark_free(mm, block->right);
>> -
>> if (drm_buddy_block_is_clear(block)) {
>> mark_cleared(block->left);
>> mark_cleared(block->right);
>> clear_reset(block);
>> }
>> + mark_free(mm, block->left);
>> + mark_free(mm, block->right);
>> +
>> mark_split(mm, block);
>> return 0;
>> @@ -491,6 +549,7 @@ EXPORT_SYMBOL(drm_get_buddy);
>> */
>> void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
>> {
>> + enum free_tree src_tree, dst_tree;
>> u64 root_size, size, start;
>> unsigned int order;
>> int i;
>> @@ -505,19 +564,24 @@ void drm_buddy_reset_clear(struct drm_buddy
>> *mm, bool is_clear)
>> size -= root_size;
>> }
>> + src_tree = is_clear ? DIRTY_TREE : CLEAR_TREE;
>> + dst_tree = is_clear ? CLEAR_TREE : DIRTY_TREE;
>> +
>> for (i = 0; i <= mm->max_order; ++i) {
>> + struct rb_root *root = get_root(mm, order, src_tree);
>> struct drm_buddy_block *block;
>> - for_each_rb_free_block_reverse(block, &mm->free_tree[i],
>> rb) {
>> - if (is_clear != drm_buddy_block_is_clear(block)) {
>> - if (is_clear) {
>> - mark_cleared(block);
>> - mm->clear_avail += drm_buddy_block_size(mm, block);
>> - } else {
>> - clear_reset(block);
>> - mm->clear_avail -= drm_buddy_block_size(mm, block);
>> - }
>> + for_each_rb_free_block_reverse(block, root, rb) {
>> + rbtree_remove(mm, block);
>> + if (is_clear) {
>> + mark_cleared(block);
>> + mm->clear_avail += drm_buddy_block_size(mm, block);
>> + } else {
>> + clear_reset(block);
>> + mm->clear_avail -= drm_buddy_block_size(mm, block);
>> }
>> +
>> + rbtree_insert(mm, block, dst_tree);
>> }
>> }
>> }
>> @@ -707,26 +771,22 @@ __drm_buddy_alloc_range_bias(struct drm_buddy *mm,
>> }
>> static struct drm_buddy_block *
>> -get_maxblock(struct drm_buddy *mm, unsigned int order,
>> - unsigned long flags)
>> +get_maxblock(struct drm_buddy *mm,
>> + unsigned int order,
>> + enum free_tree tree)
>> {
>> struct drm_buddy_block *max_block = NULL, *block = NULL;
>> + struct rb_root *root;
>> unsigned int i;
>> for (i = order; i <= mm->max_order; ++i) {
>> - struct drm_buddy_block *tmp_block;
>> -
>> - for_each_rb_free_block_reverse(tmp_block, &mm->free_tree[i],
>> rb) {
>> - if (block_incompatible(tmp_block, flags))
>> + root = get_root(mm, i, tree);
>> + if (!rbtree_is_empty(root)) {
>> + block = rbtree_last_free_block(root);
>> + if (!block)
>> continue;
>> -
>> - block = tmp_block;
>> - break;
>> }
>> - if (!block)
>> - continue;
>> -
>> if (!max_block) {
>> max_block = block;
>> continue;
>> @@ -747,36 +807,38 @@ alloc_from_freetree(struct drm_buddy *mm,
>> unsigned long flags)
>> {
>> struct drm_buddy_block *block = NULL;
>> + struct rb_root *root;
>> + enum free_tree tree;
>> unsigned int tmp;
>> int err;
>> + tree = get_tree_for_flags(flags);
>> +
>> if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
>> - block = get_maxblock(mm, order, flags);
>> + block = get_maxblock(mm, order, tree);
>> if (block)
>> /* Store the obtained block order */
>> tmp = drm_buddy_block_order(block);
>> } else {
>> for (tmp = order; tmp <= mm->max_order; ++tmp) {
>> - struct drm_buddy_block *tmp_block;
>> -
>> - for_each_rb_free_block_reverse(tmp_block,
>> &mm->free_tree[tmp], rb) {
>> - if (block_incompatible(tmp_block, flags))
>> - continue;
>> -
>> - block = tmp_block;
>> - break;
>> + /* Get RB tree root for this order and tree */
>> + root = get_root(mm, tmp, tree);
>> + if (!rbtree_is_empty(root)) {
>
> Do we need this check? last_free_block should just return NULL? Or if
> this is somehow a cheaper check, maybe we should move it into the
> helper instead?
Not required. last_free_block will return NULL. I will remove this check.
>
>> + block = rbtree_last_free_block(root);
>> + if (block)
>> + break;
>> }
>> -
>> - if (block)
>> - break;
>> }
>> }
>> if (!block) {
>> - /* Fallback method */
>> + /* Try allocating from the other tree */
>> + tree = (tree == CLEAR_TREE) ? DIRTY_TREE : CLEAR_TREE;
>> +
>> for (tmp = order; tmp <= mm->max_order; ++tmp) {
>> - if (!rbtree_is_empty(mm, tmp)) {
>> - block = rbtree_last_entry(mm, tmp);
>> + root = get_root(mm, tmp, tree);
>> + if (!rbtree_is_empty(root)) {
>
> Here also.
>
>> + block = rbtree_last_free_block(root);
>> if (block)
>> break;
>> }
>> @@ -923,6 +985,7 @@ static int __alloc_contig_try_harder(struct
>> drm_buddy *mm,
>> u64 rhs_offset, lhs_offset, lhs_size, filled;
>> struct drm_buddy_block *block;
>> LIST_HEAD(blocks_lhs);
>> + enum free_tree tree;
>> unsigned long pages;
>> unsigned int order;
>> u64 modify_size;
>> @@ -934,34 +997,39 @@ static int __alloc_contig_try_harder(struct
>> drm_buddy *mm,
>> if (order == 0)
>> return -ENOSPC;
>> - if (rbtree_is_empty(mm, order))
>> + if (rbtree_is_empty(get_root(mm, order, CLEAR_TREE)) &&
>> + rbtree_is_empty(get_root(mm, order, DIRTY_TREE)))
>> return -ENOSPC;
>> - for_each_rb_free_block_reverse(block, &mm->free_tree[order],
>> rb) {
>> - /* Allocate blocks traversing RHS */
>> - rhs_offset = drm_buddy_block_offset(block);
>> - err = __drm_buddy_alloc_range(mm, rhs_offset, size,
>> - &filled, blocks);
>> - if (!err || err != -ENOSPC)
>> - return err;
>> -
>> - lhs_size = max((size - filled), min_block_size);
>> - if (!IS_ALIGNED(lhs_size, min_block_size))
>> - lhs_size = round_up(lhs_size, min_block_size);
>> -
>> - /* Allocate blocks traversing LHS */
>> - lhs_offset = drm_buddy_block_offset(block) - lhs_size;
>> - err = __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
>> - NULL, &blocks_lhs);
>> - if (!err) {
>> - list_splice(&blocks_lhs, blocks);
>> - return 0;
>> - } else if (err != -ENOSPC) {
>> + for_each_free_tree(tree) {
>> + struct rb_root *root = get_root(mm, order, tree);
>> +
>> + for_each_rb_free_block_reverse(block, root, rb) {
>> + /* Allocate blocks traversing RHS */
>> + rhs_offset = drm_buddy_block_offset(block);
>> + err = __drm_buddy_alloc_range(mm, rhs_offset, size,
>> + &filled, blocks);
>> + if (!err || err != -ENOSPC)
>> + return err;
>> +
>> + lhs_size = max((size - filled), min_block_size);
>> + if (!IS_ALIGNED(lhs_size, min_block_size))
>> + lhs_size = round_up(lhs_size, min_block_size);
>> +
>> + /* Allocate blocks traversing LHS */
>> + lhs_offset = drm_buddy_block_offset(block) - lhs_size;
>> + err = __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
>> + NULL, &blocks_lhs);
>> + if (!err) {
>> + list_splice(&blocks_lhs, blocks);
>> + return 0;
>> + } else if (err != -ENOSPC) {
>> + drm_buddy_free_list_internal(mm, blocks);
>> + return err;
>> + }
>> + /* Free blocks for the next iteration */
>> drm_buddy_free_list_internal(mm, blocks);
>> - return err;
>> }
>> - /* Free blocks for the next iteration */
>> - drm_buddy_free_list_internal(mm, blocks);
>> }
>> return -ENOSPC;
>> @@ -1266,6 +1334,7 @@ EXPORT_SYMBOL(drm_buddy_block_print);
>> */
>> void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
>> {
>> + enum free_tree tree;
>> int order;
>> drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free:
>> %lluMiB, clear_free: %lluMiB\n",
>> @@ -1273,11 +1342,16 @@ void drm_buddy_print(struct drm_buddy *mm,
>> struct drm_printer *p)
>> for (order = mm->max_order; order >= 0; order--) {
>> struct drm_buddy_block *block;
>> + struct rb_root *root;
>> u64 count = 0, free;
>> - for_each_rb_free_block(block, &mm->free_tree[order], rb) {
>> - BUG_ON(!drm_buddy_block_is_free(block));
>> - count++;
>> + for_each_free_tree(tree) {
>> + root = get_root(mm, order, tree);
>
> Hmm, actually maybe this helper should just give the root (or both)?
> Seems to be what all users want anyway?
Most of the time, we just need to root and the functionality determines
the tree (for example : rbtree_insert() or rbtree_remove()).
May be we can remove the get_root() and use &mm->free_trees[tree][order]
directly in all the places ?
Regards,
Arun.
>
>> +
>> + for_each_rb_free_block(block, root, rb) {
>> + BUG_ON(!drm_buddy_block_is_free(block));
>> + count++;
>> + }
>> }
>> drm_printf(p, "order-%2d ", order);
>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>> index 091823592034..2fc1cc7b78bf 100644
>> --- a/include/drm/drm_buddy.h
>> +++ b/include/drm/drm_buddy.h
>> @@ -14,6 +14,12 @@
>> #include <drm/drm_print.h>
>> +enum free_tree {
>> + CLEAR_TREE = 0,
>> + DIRTY_TREE,
>> + MAX_FREE_TREES,
>> +};
>> +
>> #define range_overflows(start, size, max) ({ \
>> typeof(start) start__ = (start); \
>> typeof(size) size__ = (size); \
>> @@ -73,7 +79,7 @@ struct drm_buddy_block {
>> */
>> struct drm_buddy {
>> /* Maintain a free list for each order. */
>> - struct rb_root *free_tree;
>> + struct rb_root *free_trees[MAX_FREE_TREES];
>> /*
>> * Maintain explicit binary tree(s) to track the allocation of the
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v3 1/2] drm/buddy: Optimize free block management with RB tree
2025-08-22 12:28 ` Matthew Auld
@ 2025-08-25 11:40 ` Arunpravin Paneer Selvam
0 siblings, 0 replies; 10+ messages in thread
From: Arunpravin Paneer Selvam @ 2025-08-25 11:40 UTC (permalink / raw)
To: Matthew Auld, christian.koenig, dri-devel, amd-gfx, intel-gfx,
intel-xe
Cc: alexander.deucher
On 8/22/2025 5:58 PM, Matthew Auld wrote:
> On 22/08/2025 09:37, Matthew Auld wrote:
>> On 21/08/2025 16:06, Arunpravin Paneer Selvam wrote:
>>> Replace the freelist (O(n)) used for free block management with a
>>> red-black tree, providing more efficient O(log n) search, insert,
>>> and delete operations. This improves scalability and performance
>>> when managing large numbers of free blocks per order (e.g., hundreds
>>> or thousands).
>>>
>>> In the VK-CTS memory stress subtest, the buddy manager merges
>>> fragmented memory and inserts freed blocks into the freelist. Since
>>> freelist insertion is O(n), this becomes a bottleneck as fragmentation
>>> increases. Benchmarking shows list_insert_sorted() consumes ~52.69% CPU
>>> with the freelist, compared to just 0.03% with the RB tree
>>> (rbtree_insert.isra.0), despite performing the same sorted insert.
>>>
>>> This also improves performance in heavily fragmented workloads,
>>> such as games or graphics tests that stress memory.
>>>
>>> v3(Matthew):
>>> - Remove RB_EMPTY_NODE check in force_merge function.
>>> - Rename rb for loop macros to have less generic names and move to
>>> .c file.
>>> - Make the rb node rb and link field as union.
>>>
>>> Signed-off-by: Arunpravin Paneer Selvam
>>> <Arunpravin.PaneerSelvam@amd.com>
>>
>> CI is reporting a crash in rb_erase when running the drm_buddy kunit,
>> somewhere in drm_test_buddy_alloc_clear it seems.
>
> Found one bug in the second patch:
>
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -507,6 +507,8 @@ static int split_block(struct drm_buddy *mm,
> return -ENOMEM;
> }
>
> + mark_split(mm, block);
> +
> if (drm_buddy_block_is_clear(block)) {
> mark_cleared(block->left);
> mark_cleared(block->right);
> @@ -516,8 +518,6 @@ static int split_block(struct drm_buddy *mm,
> mark_free(mm, block->left);
> mark_free(mm, block->right);
>
> - mark_split(mm, block);
> -
> return 0;
> }
>
> Otherwise the mark_split might get the wrong rb root if we reset the
> clear state first. Might help with this crash.
Thanks, I think I missed updating here when we removed the new tree
field and modified it to use the existing dirty/free bit in the
drm_buddy_block struct in v3 patches.
I will add this fix in v4.
Regards,
Arun.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v3 2/2] drm/buddy: Separate clear and dirty free block trees
2025-08-25 11:31 ` Arunpravin Paneer Selvam
@ 2025-08-26 14:32 ` Matthew Auld
0 siblings, 0 replies; 10+ messages in thread
From: Matthew Auld @ 2025-08-26 14:32 UTC (permalink / raw)
To: Arunpravin Paneer Selvam, christian.koenig, dri-devel, amd-gfx,
intel-gfx, intel-xe
Cc: alexander.deucher
On 25/08/2025 12:31, Arunpravin Paneer Selvam wrote:
>
> On 8/22/2025 5:32 PM, Matthew Auld wrote:
>> On 21/08/2025 16:06, Arunpravin Paneer Selvam wrote:
>>> Maintain two separate RB trees per order - one for clear (zeroed) blocks
>>> and another for dirty (uncleared) blocks. This separation improves
>>> code clarity and makes it more obvious which tree is being searched
>>> during allocation. It also improves scalability and efficiency when
>>> searching for a specific type of block, avoiding unnecessary checks
>>> and making the allocator more predictable under fragmentation.
>>>
>>> The changes have been validated using the existing drm_buddy_test
>>> KUnit test cases, along with selected graphics workloads,
>>> to ensure correctness and avoid regressions.
>>>
>>> v2: Missed adding the suggested-by tag. Added it in v2.
>>> v3(Matthew):
>>> - Remove the double underscores from the internal functions.
>>> - Rename the internal functions to have less generic names.
>>> - Fix the error handling code.
>>> - Pass tree argument for the tree macro.
>>> - Use the existing dirty/free bit instead of new tree field.
>>> - Make free_trees[] instead of clear_tree and dirty_tree for
>>> more cleaner approach.
>>>
>>> Signed-off-by: Arunpravin Paneer Selvam
>>> <Arunpravin.PaneerSelvam@amd.com>
>>> Suggested-by: Matthew Auld <matthew.auld@intel.com>
>>> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4260
>>> ---
>>> drivers/gpu/drm/drm_buddy.c | 342 ++++++++++++++++++++++--------------
>>> include/drm/drm_buddy.h | 8 +-
>>> 2 files changed, 215 insertions(+), 135 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>>> index 92226a46cc2c..f57c384889a9 100644
>>> --- a/drivers/gpu/drm/drm_buddy.c
>>> +++ b/drivers/gpu/drm/drm_buddy.c
>>> @@ -14,6 +14,9 @@
>>> static struct kmem_cache *slab_blocks;
>>> +#define for_each_free_tree(tree) \
>>> + for ((tree) = CLEAR_TREE; (tree) < MAX_FREE_TREES; (tree)++)
>>> +
>>> /*
>>> * for_each_rb_free_block() - iterate over an RB tree in order
>>> * @pos: the struct type * to use as a loop cursor
>>> @@ -78,22 +81,77 @@ static void drm_block_free(struct drm_buddy *mm,
>>> kmem_cache_free(slab_blocks, block);
>>> }
>>> +static inline struct rb_root *
>>> +get_root(struct drm_buddy *mm,
>>> + unsigned int order,
>>> + enum free_tree tree)
>>> +{
>>> + if (tree == CLEAR_TREE)
>>> + return &mm->free_trees[CLEAR_TREE][order];
>>> + else
>>> + return &mm->free_trees[DIRTY_TREE][order];
>>
>> I think we can replace this with something like:
>>
>> return &mm->free_trees[tree][order];
> yes. we can replace with the above.
>>
>>> +}
>>> +
>>> +static inline enum free_tree
>>> +get_tree_for_block(struct drm_buddy_block *block)
>>> +{
>>> + return drm_buddy_block_is_clear(block) ? CLEAR_TREE : DIRTY_TREE;
>>> +}
>>> +
>>> +static inline enum free_tree
>>> +get_tree_for_flags(unsigned long flags)
>>> +{
>>> + return (flags & DRM_BUDDY_CLEAR_ALLOCATION) ? CLEAR_TREE :
>>> DIRTY_TREE;
>>> +}
>>> +
>>> +static inline struct drm_buddy_block *
>>> +rbtree_get_free_block(struct rb_node *node)
>>> +{
>>> + return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
>>> +}
>>> +
>>> +static inline struct drm_buddy_block *
>>> +rbtree_prev_free_block(struct rb_node *node)
>>> +{
>>> + return rbtree_get_free_block(rb_prev(node));
>>> +}
>>> +
>>> +static inline struct drm_buddy_block *
>>> +rbtree_first_free_block(struct rb_root *root)
>>> +{
>>> + return rbtree_get_free_block(rb_first(root));
>>> +}
>>> +
>>> +static inline struct drm_buddy_block *
>>> +rbtree_last_free_block(struct rb_root *root)
>>> +{
>>> + return rbtree_get_free_block(rb_last(root));
>>> +}
>>> +
>>> +static inline bool rbtree_is_empty(struct rb_root *root)
>>> +{
>>> + return RB_EMPTY_ROOT(root);
>>> +}
>>> +
>>> static void rbtree_insert(struct drm_buddy *mm,
>>> - struct drm_buddy_block *block)
>>> + struct drm_buddy_block *block,
>>> + enum free_tree tree)
>>> {
>>> - struct rb_root *root = &mm-
>>> >free_tree[drm_buddy_block_order(block)];
>>> - struct rb_node **link = &root->rb_node;
>>> - struct rb_node *parent = NULL;
>>> + struct rb_node **link, *parent = NULL;
>>> struct drm_buddy_block *node;
>>> - u64 offset;
>>> + struct rb_root *root;
>>> + unsigned int order;
>>> +
>>> + order = drm_buddy_block_order(block);
>>> - offset = drm_buddy_block_offset(block);
>>> + root = get_root(mm, order, tree);
>>> + link = &root->rb_node;
>>> while (*link) {
>>> parent = *link;
>>> - node = rb_entry(parent, struct drm_buddy_block, rb);
>>> + node = rbtree_get_free_block(parent);
>>> - if (offset < drm_buddy_block_offset(node))
>>> + if (drm_buddy_block_offset(block) <
>>> drm_buddy_block_offset(node))
>>> link = &parent->rb_left;
>>> else
>>> link = &parent->rb_right;
>>> @@ -106,27 +164,19 @@ static void rbtree_insert(struct drm_buddy *mm,
>>> static void rbtree_remove(struct drm_buddy *mm,
>>> struct drm_buddy_block *block)
>>> {
>>> + unsigned int order = drm_buddy_block_order(block);
>>> + enum free_tree tree;
>>> struct rb_root *root;
>>> - root = &mm->free_tree[drm_buddy_block_order(block)];
>>> + tree = drm_buddy_block_is_clear(block) ?
>>> + CLEAR_TREE : DIRTY_TREE;
>>> +
>>> + root = get_root(mm, order, tree);
>>> rb_erase(&block->rb, root);
>>> RB_CLEAR_NODE(&block->rb);
>>> }
>>> -static inline struct drm_buddy_block *
>>> -rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
>>> -{
>>> - struct rb_node *node = rb_last(&mm->free_tree[order]);
>>> -
>>> - return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
>>> -}
>>> -
>>> -static bool rbtree_is_empty(struct drm_buddy *mm, unsigned int order)
>>> -{
>>> - return RB_EMPTY_ROOT(&mm->free_tree[order]);
>>> -}
>>> -
>>> static void clear_reset(struct drm_buddy_block *block)
>>> {
>>> block->header &= ~DRM_BUDDY_HEADER_CLEAR;
>>> @@ -149,10 +199,14 @@ static void mark_allocated(struct drm_buddy *mm,
>>> static void mark_free(struct drm_buddy *mm,
>>> struct drm_buddy_block *block)
>>> {
>>> + enum free_tree tree;
>>> +
>>> block->header &= ~DRM_BUDDY_HEADER_STATE;
>>> block->header |= DRM_BUDDY_FREE;
>>> - rbtree_insert(mm, block);
>>> + tree = get_tree_for_block(block);
>>> +
>>> + rbtree_insert(mm, block, tree);
>>> }
>>> static void mark_split(struct drm_buddy *mm,
>>> @@ -238,6 +292,7 @@ static int __force_merge(struct drm_buddy *mm,
>>> u64 end,
>>> unsigned int min_order)
>>> {
>>> + enum free_tree tree;
>>> unsigned int order;
>>> int i;
>>> @@ -247,50 +302,49 @@ static int __force_merge(struct drm_buddy *mm,
>>> if (min_order > mm->max_order)
>>> return -EINVAL;
>>> - for (i = min_order - 1; i >= 0; i--) {
>>> - struct drm_buddy_block *block, *prev_block, *first_block;
>>> -
>>> - first_block = rb_entry(rb_first(&mm->free_tree[i]), struct
>>> drm_buddy_block, rb);
>>> + for_each_free_tree(tree) {
>>> + for (i = min_order - 1; i >= 0; i--) {
>>> + struct rb_root *root = get_root(mm, i, tree);
>>> + struct drm_buddy_block *block, *prev_block;
>>> - for_each_rb_free_block_reverse_safe(block, prev_block,
>>> &mm->free_tree[i], rb) {
>>> - struct drm_buddy_block *buddy;
>>> - u64 block_start, block_end;
>>> + for_each_rb_free_block_reverse_safe(block, prev_block,
>>> root, rb) {
>>> + struct drm_buddy_block *buddy;
>>> + u64 block_start, block_end;
>>> - if (!block->parent)
>>> - continue;
>>> + if (!block->parent)
>>> + continue;
>>> - block_start = drm_buddy_block_offset(block);
>>> - block_end = block_start + drm_buddy_block_size(mm,
>>> block) - 1;
>>> + block_start = drm_buddy_block_offset(block);
>>> + block_end = block_start + drm_buddy_block_size(mm,
>>> block) - 1;
>>> - if (!contains(start, end, block_start, block_end))
>>> - continue;
>>> + if (!contains(start, end, block_start, block_end))
>>> + continue;
>>> - buddy = __get_buddy(block);
>>> - if (!drm_buddy_block_is_free(buddy))
>>> - continue;
>>> + buddy = __get_buddy(block);
>>> + if (!drm_buddy_block_is_free(buddy))
>>> + continue;
>>> - WARN_ON(drm_buddy_block_is_clear(block) ==
>>> - drm_buddy_block_is_clear(buddy));
>>> + WARN_ON(drm_buddy_block_is_clear(block) ==
>>> + drm_buddy_block_is_clear(buddy));
>>> - /*
>>> - * If the prev block is same as buddy, don't access the
>>> - * block in the next iteration as we would free the
>>> - * buddy block as part of the free function.
>>> - */
>>> - if (prev_block && prev_block == buddy) {
>>> - if (prev_block != first_block)
>>> - prev_block = rb_entry(rb_prev(&prev_block->rb),
>>> - struct drm_buddy_block,
>>> - rb);
>>> - }
>>> + /*
>>> + * If the prev block is same as buddy, don't access the
>>> + * block in the next iteration as we would free the
>>> + * buddy block as part of the free function.
>>> + */
>>> + if (prev_block && prev_block == buddy) {
>>> + if (prev_block != rbtree_first_free_block(root))
>>> + prev_block =
>>> rbtree_prev_free_block(&prev_block->rb);
>>> + }
>>> - rbtree_remove(mm, block);
>>> - if (drm_buddy_block_is_clear(block))
>>> - mm->clear_avail -= drm_buddy_block_size(mm, block);
>>> + rbtree_remove(mm, block);
>>> + if (drm_buddy_block_is_clear(block))
>>> + mm->clear_avail -= drm_buddy_block_size(mm, block);
>>> - order = __drm_buddy_free(mm, block, true);
>>> - if (order >= min_order)
>>> - return 0;
>>> + order = __drm_buddy_free(mm, block, true);
>>> + if (order >= min_order)
>>> + return 0;
>>> + }
>>> }
>>> }
>>> @@ -311,7 +365,7 @@ static int __force_merge(struct drm_buddy *mm,
>>> */
>>> int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
>>> {
>>> - unsigned int i;
>>> + unsigned int i, j;
>>> u64 offset;
>>> if (size < chunk_size)
>>> @@ -333,14 +387,16 @@ int drm_buddy_init(struct drm_buddy *mm, u64
>>> size, u64 chunk_size)
>>> BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
>>> - mm->free_tree = kmalloc_array(mm->max_order + 1,
>>> - sizeof(struct rb_root),
>>> - GFP_KERNEL);
>>> - if (!mm->free_tree)
>>> - return -ENOMEM;
>>> + for (i = 0; i < MAX_FREE_TREES; i++) {
>>> + mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
>>> + sizeof(struct rb_root),
>>> + GFP_KERNEL);
>>> + if (!mm->free_trees[i])
>>> + goto out_free_tree;
>>> - for (i = 0; i <= mm->max_order; ++i)
>>> - mm->free_tree[i] = RB_ROOT;
>>> + for (j = 0; j <= mm->max_order; ++j)
>>> + mm->free_trees[i][j] = RB_ROOT;
>>> + }
>>> mm->n_roots = hweight64(size);
>>> @@ -388,7 +444,8 @@ int drm_buddy_init(struct drm_buddy *mm, u64
>>> size, u64 chunk_size)
>>> drm_block_free(mm, mm->roots[i]);
>>> kfree(mm->roots);
>>> out_free_tree:
>>> - kfree(mm->free_tree);
>>> + while (i--)
>>> + kfree(mm->free_trees[i]);
>>> return -ENOMEM;
>>> }
>>> EXPORT_SYMBOL(drm_buddy_init);
>>> @@ -424,8 +481,9 @@ void drm_buddy_fini(struct drm_buddy *mm)
>>> WARN_ON(mm->avail != mm->size);
>>> + for (i = 0; i < MAX_FREE_TREES; i++)
>>> + kfree(mm->free_trees[i]);
>>> kfree(mm->roots);
>>> - kfree(mm->free_tree);
>>> }
>>> EXPORT_SYMBOL(drm_buddy_fini);
>>> @@ -449,15 +507,15 @@ static int split_block(struct drm_buddy *mm,
>>> return -ENOMEM;
>>> }
>>> - mark_free(mm, block->left);
>>> - mark_free(mm, block->right);
>>> -
>>> if (drm_buddy_block_is_clear(block)) {
>>> mark_cleared(block->left);
>>> mark_cleared(block->right);
>>> clear_reset(block);
>>> }
>>> + mark_free(mm, block->left);
>>> + mark_free(mm, block->right);
>>> +
>>> mark_split(mm, block);
>>> return 0;
>>> @@ -491,6 +549,7 @@ EXPORT_SYMBOL(drm_get_buddy);
>>> */
>>> void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
>>> {
>>> + enum free_tree src_tree, dst_tree;
>>> u64 root_size, size, start;
>>> unsigned int order;
>>> int i;
>>> @@ -505,19 +564,24 @@ void drm_buddy_reset_clear(struct drm_buddy
>>> *mm, bool is_clear)
>>> size -= root_size;
>>> }
>>> + src_tree = is_clear ? DIRTY_TREE : CLEAR_TREE;
>>> + dst_tree = is_clear ? CLEAR_TREE : DIRTY_TREE;
>>> +
>>> for (i = 0; i <= mm->max_order; ++i) {
>>> + struct rb_root *root = get_root(mm, order, src_tree);
>>> struct drm_buddy_block *block;
>>> - for_each_rb_free_block_reverse(block, &mm->free_tree[i],
>>> rb) {
>>> - if (is_clear != drm_buddy_block_is_clear(block)) {
>>> - if (is_clear) {
>>> - mark_cleared(block);
>>> - mm->clear_avail += drm_buddy_block_size(mm, block);
>>> - } else {
>>> - clear_reset(block);
>>> - mm->clear_avail -= drm_buddy_block_size(mm, block);
>>> - }
>>> + for_each_rb_free_block_reverse(block, root, rb) {
>>> + rbtree_remove(mm, block);
>>> + if (is_clear) {
>>> + mark_cleared(block);
>>> + mm->clear_avail += drm_buddy_block_size(mm, block);
>>> + } else {
>>> + clear_reset(block);
>>> + mm->clear_avail -= drm_buddy_block_size(mm, block);
>>> }
>>> +
>>> + rbtree_insert(mm, block, dst_tree);
>>> }
>>> }
>>> }
>>> @@ -707,26 +771,22 @@ __drm_buddy_alloc_range_bias(struct drm_buddy *mm,
>>> }
>>> static struct drm_buddy_block *
>>> -get_maxblock(struct drm_buddy *mm, unsigned int order,
>>> - unsigned long flags)
>>> +get_maxblock(struct drm_buddy *mm,
>>> + unsigned int order,
>>> + enum free_tree tree)
>>> {
>>> struct drm_buddy_block *max_block = NULL, *block = NULL;
>>> + struct rb_root *root;
>>> unsigned int i;
>>> for (i = order; i <= mm->max_order; ++i) {
>>> - struct drm_buddy_block *tmp_block;
>>> -
>>> - for_each_rb_free_block_reverse(tmp_block, &mm->free_tree[i],
>>> rb) {
>>> - if (block_incompatible(tmp_block, flags))
>>> + root = get_root(mm, i, tree);
>>> + if (!rbtree_is_empty(root)) {
>>> + block = rbtree_last_free_block(root);
>>> + if (!block)
>>> continue;
>>> -
>>> - block = tmp_block;
>>> - break;
>>> }
>>> - if (!block)
>>> - continue;
>>> -
>>> if (!max_block) {
>>> max_block = block;
>>> continue;
>>> @@ -747,36 +807,38 @@ alloc_from_freetree(struct drm_buddy *mm,
>>> unsigned long flags)
>>> {
>>> struct drm_buddy_block *block = NULL;
>>> + struct rb_root *root;
>>> + enum free_tree tree;
>>> unsigned int tmp;
>>> int err;
>>> + tree = get_tree_for_flags(flags);
>>> +
>>> if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
>>> - block = get_maxblock(mm, order, flags);
>>> + block = get_maxblock(mm, order, tree);
>>> if (block)
>>> /* Store the obtained block order */
>>> tmp = drm_buddy_block_order(block);
>>> } else {
>>> for (tmp = order; tmp <= mm->max_order; ++tmp) {
>>> - struct drm_buddy_block *tmp_block;
>>> -
>>> - for_each_rb_free_block_reverse(tmp_block, &mm-
>>> >free_tree[tmp], rb) {
>>> - if (block_incompatible(tmp_block, flags))
>>> - continue;
>>> -
>>> - block = tmp_block;
>>> - break;
>>> + /* Get RB tree root for this order and tree */
>>> + root = get_root(mm, tmp, tree);
>>> + if (!rbtree_is_empty(root)) {
>>
>> Do we need this check? last_free_block should just return NULL? Or if
>> this is somehow a cheaper check, maybe we should move it into the
>> helper instead?
> Not required. last_free_block will return NULL. I will remove this check.
>>
>>> + block = rbtree_last_free_block(root);
>>> + if (block)
>>> + break;
>>> }
>>> -
>>> - if (block)
>>> - break;
>>> }
>>> }
>>> if (!block) {
>>> - /* Fallback method */
>>> + /* Try allocating from the other tree */
>>> + tree = (tree == CLEAR_TREE) ? DIRTY_TREE : CLEAR_TREE;
>>> +
>>> for (tmp = order; tmp <= mm->max_order; ++tmp) {
>>> - if (!rbtree_is_empty(mm, tmp)) {
>>> - block = rbtree_last_entry(mm, tmp);
>>> + root = get_root(mm, tmp, tree);
>>> + if (!rbtree_is_empty(root)) {
>>
>> Here also.
>>
>>> + block = rbtree_last_free_block(root);
>>> if (block)
>>> break;
>>> }
>>> @@ -923,6 +985,7 @@ static int __alloc_contig_try_harder(struct
>>> drm_buddy *mm,
>>> u64 rhs_offset, lhs_offset, lhs_size, filled;
>>> struct drm_buddy_block *block;
>>> LIST_HEAD(blocks_lhs);
>>> + enum free_tree tree;
>>> unsigned long pages;
>>> unsigned int order;
>>> u64 modify_size;
>>> @@ -934,34 +997,39 @@ static int __alloc_contig_try_harder(struct
>>> drm_buddy *mm,
>>> if (order == 0)
>>> return -ENOSPC;
>>> - if (rbtree_is_empty(mm, order))
>>> + if (rbtree_is_empty(get_root(mm, order, CLEAR_TREE)) &&
>>> + rbtree_is_empty(get_root(mm, order, DIRTY_TREE)))
>>> return -ENOSPC;
>>> - for_each_rb_free_block_reverse(block, &mm->free_tree[order],
>>> rb) {
>>> - /* Allocate blocks traversing RHS */
>>> - rhs_offset = drm_buddy_block_offset(block);
>>> - err = __drm_buddy_alloc_range(mm, rhs_offset, size,
>>> - &filled, blocks);
>>> - if (!err || err != -ENOSPC)
>>> - return err;
>>> -
>>> - lhs_size = max((size - filled), min_block_size);
>>> - if (!IS_ALIGNED(lhs_size, min_block_size))
>>> - lhs_size = round_up(lhs_size, min_block_size);
>>> -
>>> - /* Allocate blocks traversing LHS */
>>> - lhs_offset = drm_buddy_block_offset(block) - lhs_size;
>>> - err = __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
>>> - NULL, &blocks_lhs);
>>> - if (!err) {
>>> - list_splice(&blocks_lhs, blocks);
>>> - return 0;
>>> - } else if (err != -ENOSPC) {
>>> + for_each_free_tree(tree) {
>>> + struct rb_root *root = get_root(mm, order, tree);
>>> +
>>> + for_each_rb_free_block_reverse(block, root, rb) {
>>> + /* Allocate blocks traversing RHS */
>>> + rhs_offset = drm_buddy_block_offset(block);
>>> + err = __drm_buddy_alloc_range(mm, rhs_offset, size,
>>> + &filled, blocks);
>>> + if (!err || err != -ENOSPC)
>>> + return err;
>>> +
>>> + lhs_size = max((size - filled), min_block_size);
>>> + if (!IS_ALIGNED(lhs_size, min_block_size))
>>> + lhs_size = round_up(lhs_size, min_block_size);
>>> +
>>> + /* Allocate blocks traversing LHS */
>>> + lhs_offset = drm_buddy_block_offset(block) - lhs_size;
>>> + err = __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
>>> + NULL, &blocks_lhs);
>>> + if (!err) {
>>> + list_splice(&blocks_lhs, blocks);
>>> + return 0;
>>> + } else if (err != -ENOSPC) {
>>> + drm_buddy_free_list_internal(mm, blocks);
>>> + return err;
>>> + }
>>> + /* Free blocks for the next iteration */
>>> drm_buddy_free_list_internal(mm, blocks);
>>> - return err;
>>> }
>>> - /* Free blocks for the next iteration */
>>> - drm_buddy_free_list_internal(mm, blocks);
>>> }
>>> return -ENOSPC;
>>> @@ -1266,6 +1334,7 @@ EXPORT_SYMBOL(drm_buddy_block_print);
>>> */
>>> void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
>>> {
>>> + enum free_tree tree;
>>> int order;
>>> drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free:
>>> %lluMiB, clear_free: %lluMiB\n",
>>> @@ -1273,11 +1342,16 @@ void drm_buddy_print(struct drm_buddy *mm,
>>> struct drm_printer *p)
>>> for (order = mm->max_order; order >= 0; order--) {
>>> struct drm_buddy_block *block;
>>> + struct rb_root *root;
>>> u64 count = 0, free;
>>> - for_each_rb_free_block(block, &mm->free_tree[order], rb) {
>>> - BUG_ON(!drm_buddy_block_is_free(block));
>>> - count++;
>>> + for_each_free_tree(tree) {
>>> + root = get_root(mm, order, tree);
>>
>> Hmm, actually maybe this helper should just give the root (or both)?
>> Seems to be what all users want anyway?
>
> Most of the time, we just need to root and the functionality determines
> the tree (for example : rbtree_insert() or rbtree_remove()).
>
> May be we can remove the get_root() and use &mm->free_trees[tree][order]
> directly in all the places ?
Yeah, that might be better.
>
> Regards,
>
> Arun.
>
>>
>>> +
>>> + for_each_rb_free_block(block, root, rb) {
>>> + BUG_ON(!drm_buddy_block_is_free(block));
>>> + count++;
>>> + }
>>> }
>>> drm_printf(p, "order-%2d ", order);
>>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>>> index 091823592034..2fc1cc7b78bf 100644
>>> --- a/include/drm/drm_buddy.h
>>> +++ b/include/drm/drm_buddy.h
>>> @@ -14,6 +14,12 @@
>>> #include <drm/drm_print.h>
>>> +enum free_tree {
>>> + CLEAR_TREE = 0,
>>> + DIRTY_TREE,
>>> + MAX_FREE_TREES,
>>> +};
>>> +
>>> #define range_overflows(start, size, max) ({ \
>>> typeof(start) start__ = (start); \
>>> typeof(size) size__ = (size); \
>>> @@ -73,7 +79,7 @@ struct drm_buddy_block {
>>> */
>>> struct drm_buddy {
>>> /* Maintain a free list for each order. */
>>> - struct rb_root *free_tree;
>>> + struct rb_root *free_trees[MAX_FREE_TREES];
>>> /*
>>> * Maintain explicit binary tree(s) to track the allocation of the
>>
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2025-08-26 14:32 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-21 15:06 [PATCH v3 1/2] drm/buddy: Optimize free block management with RB tree Arunpravin Paneer Selvam
2025-08-21 15:06 ` [PATCH v3 2/2] drm/buddy: Separate clear and dirty free block trees Arunpravin Paneer Selvam
2025-08-22 12:02 ` Matthew Auld
2025-08-25 11:31 ` Arunpravin Paneer Selvam
2025-08-26 14:32 ` Matthew Auld
2025-08-21 17:09 ` ✗ CI.checkpatch: warning for series starting with [v3,1/2] drm/buddy: Optimize free block management with RB tree Patchwork
2025-08-21 17:10 ` ✗ CI.KUnit: failure " Patchwork
2025-08-22 8:37 ` [PATCH v3 1/2] " Matthew Auld
2025-08-22 12:28 ` Matthew Auld
2025-08-25 11:40 ` Arunpravin Paneer Selvam
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).