* [PATCH v5 1/2] drm/buddy: Optimize free block management with RB tree
@ 2025-09-01 18:56 Arunpravin Paneer Selvam
2025-09-01 19:41 ` Peter Zijlstra
2025-09-02 10:23 ` Jani Nikula
0 siblings, 2 replies; 5+ messages in thread
From: Arunpravin Paneer Selvam @ 2025-09-01 18:56 UTC (permalink / raw)
To: christian.koenig, matthew.auld, jani.nikula, peterz, dri-devel,
amd-gfx, intel-gfx, intel-xe, linux-kernel, stable
Cc: alexander.deucher, Arunpravin Paneer Selvam
Replace the freelist (O(n)) used for free block management with a
red-black tree, providing more efficient O(log n) search, insert,
and delete operations. This improves scalability and performance
when managing large numbers of free blocks per order (e.g., hundreds
or thousands).
In the VK-CTS memory stress subtest, the buddy manager merges
fragmented memory and inserts freed blocks into the freelist. Since
freelist insertion is O(n), this becomes a bottleneck as fragmentation
increases. Benchmarking shows list_insert_sorted() consumes ~52.69% CPU
with the freelist, compared to just 0.03% with the RB tree
(rbtree_insert.isra.0), despite performing the same sorted insert.
This also improves performance in heavily fragmented workloads,
such as games or graphics tests that stress memory.
v3(Matthew):
- Remove RB_EMPTY_NODE check in force_merge function.
- Rename rb for loop macros to have less generic names and move to
.c file.
- Make the rb node rb and link field as union.
v4(Jani Nikula):
- The kernel-doc comment should be "/**"
- Move all the rbtree macros to rbtree.h and add parens to ensure
correct precedence.
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
---
drivers/gpu/drm/drm_buddy.c | 142 ++++++++++++++++++++++--------------
include/drm/drm_buddy.h | 9 ++-
include/linux/rbtree.h | 56 ++++++++++++++
3 files changed, 152 insertions(+), 55 deletions(-)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index a94061f373de..978cabfbcf0f 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -31,6 +31,8 @@ static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
block->header |= order;
block->parent = parent;
+ RB_CLEAR_NODE(&block->rb);
+
BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
return block;
}
@@ -41,23 +43,53 @@ static void drm_block_free(struct drm_buddy *mm,
kmem_cache_free(slab_blocks, block);
}
-static void list_insert_sorted(struct drm_buddy *mm,
- struct drm_buddy_block *block)
+static void rbtree_insert(struct drm_buddy *mm,
+ struct drm_buddy_block *block)
{
+ struct rb_root *root = &mm->free_tree[drm_buddy_block_order(block)];
+ struct rb_node **link = &root->rb_node;
+ struct rb_node *parent = NULL;
struct drm_buddy_block *node;
- struct list_head *head;
+ u64 offset;
+
+ offset = drm_buddy_block_offset(block);
- head = &mm->free_list[drm_buddy_block_order(block)];
- if (list_empty(head)) {
- list_add(&block->link, head);
- return;
+ while (*link) {
+ parent = *link;
+ node = rb_entry(parent, struct drm_buddy_block, rb);
+
+ if (offset < drm_buddy_block_offset(node))
+ link = &parent->rb_left;
+ else
+ link = &parent->rb_right;
}
- list_for_each_entry(node, head, link)
- if (drm_buddy_block_offset(block) < drm_buddy_block_offset(node))
- break;
+ rb_link_node(&block->rb, parent, link);
+ rb_insert_color(&block->rb, root);
+}
+
+static void rbtree_remove(struct drm_buddy *mm,
+ struct drm_buddy_block *block)
+{
+ struct rb_root *root;
+
+ root = &mm->free_tree[drm_buddy_block_order(block)];
+ rb_erase(&block->rb, root);
- __list_add(&block->link, node->link.prev, &node->link);
+ RB_CLEAR_NODE(&block->rb);
+}
+
+static inline struct drm_buddy_block *
+rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
+{
+ struct rb_node *node = rb_last(&mm->free_tree[order]);
+
+ return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
+}
+
+static bool rbtree_is_empty(struct drm_buddy *mm, unsigned int order)
+{
+ return RB_EMPTY_ROOT(&mm->free_tree[order]);
}
static void clear_reset(struct drm_buddy_block *block)
@@ -70,12 +102,13 @@ static void mark_cleared(struct drm_buddy_block *block)
block->header |= DRM_BUDDY_HEADER_CLEAR;
}
-static void mark_allocated(struct drm_buddy_block *block)
+static void mark_allocated(struct drm_buddy *mm,
+ struct drm_buddy_block *block)
{
block->header &= ~DRM_BUDDY_HEADER_STATE;
block->header |= DRM_BUDDY_ALLOCATED;
- list_del(&block->link);
+ rbtree_remove(mm, block);
}
static void mark_free(struct drm_buddy *mm,
@@ -84,15 +117,16 @@ static void mark_free(struct drm_buddy *mm,
block->header &= ~DRM_BUDDY_HEADER_STATE;
block->header |= DRM_BUDDY_FREE;
- list_insert_sorted(mm, block);
+ rbtree_insert(mm, block);
}
-static void mark_split(struct drm_buddy_block *block)
+static void mark_split(struct drm_buddy *mm,
+ struct drm_buddy_block *block)
{
block->header &= ~DRM_BUDDY_HEADER_STATE;
block->header |= DRM_BUDDY_SPLIT;
- list_del(&block->link);
+ rbtree_remove(mm, block);
}
static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
@@ -148,7 +182,7 @@ static unsigned int __drm_buddy_free(struct drm_buddy *mm,
mark_cleared(parent);
}
- list_del(&buddy->link);
+ rbtree_remove(mm, buddy);
if (force_merge && drm_buddy_block_is_clear(buddy))
mm->clear_avail -= drm_buddy_block_size(mm, buddy);
@@ -179,9 +213,11 @@ static int __force_merge(struct drm_buddy *mm,
return -EINVAL;
for (i = min_order - 1; i >= 0; i--) {
- struct drm_buddy_block *block, *prev;
+ struct drm_buddy_block *block, *prev_block, *first_block;
+
+ first_block = rb_entry(rb_first(&mm->free_tree[i]), struct drm_buddy_block, rb);
- list_for_each_entry_safe_reverse(block, prev, &mm->free_list[i], link) {
+ rbtree_reverse_for_each_entry_safe(block, prev_block, &mm->free_tree[i], rb) {
struct drm_buddy_block *buddy;
u64 block_start, block_end;
@@ -206,10 +242,14 @@ static int __force_merge(struct drm_buddy *mm,
* block in the next iteration as we would free the
* buddy block as part of the free function.
*/
- if (prev == buddy)
- prev = list_prev_entry(prev, link);
+ if (prev_block && prev_block == buddy) {
+ if (prev_block != first_block)
+ prev_block = rb_entry(rb_prev(&prev_block->rb),
+ struct drm_buddy_block,
+ rb);
+ }
- list_del(&block->link);
+ rbtree_remove(mm, block);
if (drm_buddy_block_is_clear(block))
mm->clear_avail -= drm_buddy_block_size(mm, block);
@@ -258,14 +298,14 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
- mm->free_list = kmalloc_array(mm->max_order + 1,
- sizeof(struct list_head),
+ mm->free_tree = kmalloc_array(mm->max_order + 1,
+ sizeof(struct rb_root),
GFP_KERNEL);
- if (!mm->free_list)
+ if (!mm->free_tree)
return -ENOMEM;
for (i = 0; i <= mm->max_order; ++i)
- INIT_LIST_HEAD(&mm->free_list[i]);
+ mm->free_tree[i] = RB_ROOT;
mm->n_roots = hweight64(size);
@@ -273,7 +313,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
sizeof(struct drm_buddy_block *),
GFP_KERNEL);
if (!mm->roots)
- goto out_free_list;
+ goto out_free_tree;
offset = 0;
i = 0;
@@ -312,8 +352,8 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
while (i--)
drm_block_free(mm, mm->roots[i]);
kfree(mm->roots);
-out_free_list:
- kfree(mm->free_list);
+out_free_tree:
+ kfree(mm->free_tree);
return -ENOMEM;
}
EXPORT_SYMBOL(drm_buddy_init);
@@ -323,7 +363,7 @@ EXPORT_SYMBOL(drm_buddy_init);
*
* @mm: DRM buddy manager to free
*
- * Cleanup memory manager resources and the freelist
+ * Cleanup memory manager resources and the freetree
*/
void drm_buddy_fini(struct drm_buddy *mm)
{
@@ -350,7 +390,7 @@ void drm_buddy_fini(struct drm_buddy *mm)
WARN_ON(mm->avail != mm->size);
kfree(mm->roots);
- kfree(mm->free_list);
+ kfree(mm->free_tree);
}
EXPORT_SYMBOL(drm_buddy_fini);
@@ -383,7 +423,7 @@ static int split_block(struct drm_buddy *mm,
clear_reset(block);
}
- mark_split(block);
+ mark_split(mm, block);
return 0;
}
@@ -412,7 +452,7 @@ EXPORT_SYMBOL(drm_get_buddy);
* @is_clear: blocks clear state
*
* Reset the clear state based on @is_clear value for each block
- * in the freelist.
+ * in the freetree.
*/
void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
{
@@ -433,7 +473,7 @@ void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
for (i = 0; i <= mm->max_order; ++i) {
struct drm_buddy_block *block;
- list_for_each_entry_reverse(block, &mm->free_list[i], link) {
+ rbtree_reverse_for_each_entry(block, &mm->free_tree[i], rb) {
if (is_clear != drm_buddy_block_is_clear(block)) {
if (is_clear) {
mark_cleared(block);
@@ -641,7 +681,7 @@ get_maxblock(struct drm_buddy *mm, unsigned int order,
for (i = order; i <= mm->max_order; ++i) {
struct drm_buddy_block *tmp_block;
- list_for_each_entry_reverse(tmp_block, &mm->free_list[i], link) {
+ rbtree_reverse_for_each_entry(tmp_block, &mm->free_tree[i], rb) {
if (block_incompatible(tmp_block, flags))
continue;
@@ -667,7 +707,7 @@ get_maxblock(struct drm_buddy *mm, unsigned int order,
}
static struct drm_buddy_block *
-alloc_from_freelist(struct drm_buddy *mm,
+alloc_from_freetree(struct drm_buddy *mm,
unsigned int order,
unsigned long flags)
{
@@ -684,7 +724,7 @@ alloc_from_freelist(struct drm_buddy *mm,
for (tmp = order; tmp <= mm->max_order; ++tmp) {
struct drm_buddy_block *tmp_block;
- list_for_each_entry_reverse(tmp_block, &mm->free_list[tmp], link) {
+ rbtree_reverse_for_each_entry(tmp_block, &mm->free_tree[tmp], rb) {
if (block_incompatible(tmp_block, flags))
continue;
@@ -700,10 +740,8 @@ alloc_from_freelist(struct drm_buddy *mm,
if (!block) {
/* Fallback method */
for (tmp = order; tmp <= mm->max_order; ++tmp) {
- if (!list_empty(&mm->free_list[tmp])) {
- block = list_last_entry(&mm->free_list[tmp],
- struct drm_buddy_block,
- link);
+ if (!rbtree_is_empty(mm, tmp)) {
+ block = rbtree_last_entry(mm, tmp);
if (block)
break;
}
@@ -771,7 +809,7 @@ static int __alloc_range(struct drm_buddy *mm,
if (contains(start, end, block_start, block_end)) {
if (drm_buddy_block_is_free(block)) {
- mark_allocated(block);
+ mark_allocated(mm, block);
total_allocated += drm_buddy_block_size(mm, block);
mm->avail -= drm_buddy_block_size(mm, block);
if (drm_buddy_block_is_clear(block))
@@ -849,7 +887,6 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
{
u64 rhs_offset, lhs_offset, lhs_size, filled;
struct drm_buddy_block *block;
- struct list_head *list;
LIST_HEAD(blocks_lhs);
unsigned long pages;
unsigned int order;
@@ -862,11 +899,10 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
if (order == 0)
return -ENOSPC;
- list = &mm->free_list[order];
- if (list_empty(list))
+ if (rbtree_is_empty(mm, order))
return -ENOSPC;
- list_for_each_entry_reverse(block, list, link) {
+ rbtree_reverse_for_each_entry(block, &mm->free_tree[order], rb) {
/* Allocate blocks traversing RHS */
rhs_offset = drm_buddy_block_offset(block);
err = __drm_buddy_alloc_range(mm, rhs_offset, size,
@@ -976,7 +1012,7 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
list_add(&block->tmp_link, &dfs);
err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
if (err) {
- mark_allocated(block);
+ mark_allocated(mm, block);
mm->avail -= drm_buddy_block_size(mm, block);
if (drm_buddy_block_is_clear(block))
mm->clear_avail -= drm_buddy_block_size(mm, block);
@@ -999,8 +1035,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
return __drm_buddy_alloc_range_bias(mm, start, end,
order, flags);
else
- /* Allocate from freelist */
- return alloc_from_freelist(mm, order, flags);
+ /* Allocate from freetree */
+ return alloc_from_freetree(mm, order, flags);
}
/**
@@ -1017,8 +1053,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
* alloc_range_bias() called on range limitations, which traverses
* the tree and returns the desired block.
*
- * alloc_from_freelist() called when *no* range restrictions
- * are enforced, which picks the block from the freelist.
+ * alloc_from_freetree() called when *no* range restrictions
+ * are enforced, which picks the block from the freetree.
*
* Returns:
* 0 on success, error code on failure.
@@ -1120,7 +1156,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
}
} while (1);
- mark_allocated(block);
+ mark_allocated(mm, block);
mm->avail -= drm_buddy_block_size(mm, block);
if (drm_buddy_block_is_clear(block))
mm->clear_avail -= drm_buddy_block_size(mm, block);
@@ -1204,7 +1240,7 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
struct drm_buddy_block *block;
u64 count = 0, free;
- list_for_each_entry(block, &mm->free_list[order], link) {
+ rbtree_for_each_entry(block, &mm->free_tree[order], rb) {
BUG_ON(!drm_buddy_block_is_free(block));
count++;
}
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index 513837632b7d..091823592034 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -10,6 +10,7 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/rbtree.h>
#include <drm/drm_print.h>
@@ -53,7 +54,11 @@ struct drm_buddy_block {
* a list, if so desired. As soon as the block is freed with
* drm_buddy_free* ownership is given back to the mm.
*/
- struct list_head link;
+ union {
+ struct rb_node rb;
+ struct list_head link;
+ };
+
struct list_head tmp_link;
};
@@ -68,7 +73,7 @@ struct drm_buddy_block {
*/
struct drm_buddy {
/* Maintain a free list for each order. */
- struct list_head *free_list;
+ struct rb_root *free_tree;
/*
* Maintain explicit binary tree(s) to track the allocation of the
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 8d2ba3749866..17190bb4837c 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -79,6 +79,62 @@ static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent
____ptr ? rb_entry(____ptr, type, member) : NULL; \
})
+/**
+ * rbtree_for_each_entry - iterate in-order over rb_root of given type
+ *
+ * @pos: the 'type *' to use as a loop cursor.
+ * @root: 'rb_root *' of the rbtree.
+ * @member: the name of the rb_node field within 'type'.
+ */
+#define rbtree_for_each_entry(pos, root, member) \
+ for ((pos) = rb_entry_safe(rb_first(root), typeof(*(pos)), member); \
+ (pos); \
+ (pos) = rb_entry_safe(rb_next(&(pos)->member), typeof(*(pos)), member))
+
+/**
+ * rbtree_reverse_for_each_entry - iterate in reverse in-order over rb_root
+ * of given type
+ *
+ * @pos: the 'type *' to use as a loop cursor.
+ * @root: 'rb_root *' of the rbtree.
+ * @member: the name of the rb_node field within 'type'.
+ */
+#define rbtree_reverse_for_each_entry(pos, root, member) \
+ for ((pos) = rb_entry_safe(rb_last(root), typeof(*(pos)), member); \
+ (pos); \
+ (pos) = rb_entry_safe(rb_prev(&(pos)->member), typeof(*(pos)), member))
+
+/**
+ * rbtree_for_each_entry_safe - iterate in-order over rb_root safe against removal
+ *
+ * @pos: the 'type *' to use as a loop cursor
+ * @n: another 'type *' to use as temporary storage
+ * @root: 'rb_root *' of the rbtree
+ * @member: the name of the rb_node field within 'type'
+ */
+#define rbtree_for_each_entry_safe(pos, n, root, member) \
+ for ((pos) = rb_entry_safe(rb_first(root), typeof(*(pos)), member), \
+ (n) = (pos) ? rb_entry_safe(rb_next(&(pos)->member), typeof(*(pos)), member) : NULL; \
+ (pos); \
+ (pos) = (n), \
+ (n) = (pos) ? rb_entry_safe(rb_next(&(pos)->member), typeof(*(pos)), member) : NULL)
+
+/**
+ * rbtree_reverse_for_each_entry_safe - iterate in reverse in-order over rb_root
+ * safe against removal
+ *
+ * @pos: the struct type * to use as a loop cursor.
+ * @n: another struct type * to use as temporary storage.
+ * @root: pointer to struct rb_root to iterate.
+ * @member: name of the rb_node field within the struct.
+ */
+#define rbtree_reverse_for_each_entry_safe(pos, n, root, member) \
+ for ((pos) = rb_entry_safe(rb_last(root), typeof(*(pos)), member), \
+ (n) = (pos) ? rb_entry_safe(rb_prev(&(pos)->member), typeof(*(pos)), member) : NULL; \
+ (pos); \
+ (pos) = (n), \
+ (n) = (pos) ? rb_entry_safe(rb_prev(&(pos)->member), typeof(*(pos)), member) : NULL)
+
/**
* rbtree_postorder_for_each_entry_safe - iterate in post-order over rb_root of
* given type allowing the backing memory of @pos to be invalidated
base-commit: f4c75f975cf50fa2e1fd96c5aafe5aa62e55fbe4
--
2.34.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH v5 1/2] drm/buddy: Optimize free block management with RB tree
2025-09-01 18:56 [PATCH v5 1/2] drm/buddy: Optimize free block management with RB tree Arunpravin Paneer Selvam
@ 2025-09-01 19:41 ` Peter Zijlstra
2025-09-02 6:09 ` Arunpravin Paneer Selvam
2025-09-02 10:23 ` Jani Nikula
1 sibling, 1 reply; 5+ messages in thread
From: Peter Zijlstra @ 2025-09-01 19:41 UTC (permalink / raw)
To: Arunpravin Paneer Selvam
Cc: christian.koenig, matthew.auld, jani.nikula, dri-devel, amd-gfx,
intel-gfx, intel-xe, linux-kernel, stable, alexander.deucher
On Tue, Sep 02, 2025 at 12:26:04AM +0530, Arunpravin Paneer Selvam wrote:
> Replace the freelist (O(n)) used for free block management with a
> red-black tree, providing more efficient O(log n) search, insert,
> and delete operations. This improves scalability and performance
> when managing large numbers of free blocks per order (e.g., hundreds
> or thousands).
Did you consider the interval tree?
> @@ -41,23 +43,53 @@ static void drm_block_free(struct drm_buddy *mm,
> kmem_cache_free(slab_blocks, block);
> }
>
> -static void list_insert_sorted(struct drm_buddy *mm,
> - struct drm_buddy_block *block)
> +static void rbtree_insert(struct drm_buddy *mm,
> + struct drm_buddy_block *block)
> {
> + struct rb_root *root = &mm->free_tree[drm_buddy_block_order(block)];
> + struct rb_node **link = &root->rb_node;
> + struct rb_node *parent = NULL;
> struct drm_buddy_block *node;
> - struct list_head *head;
> + u64 offset;
> +
> + offset = drm_buddy_block_offset(block);
>
> - head = &mm->free_list[drm_buddy_block_order(block)];
> - if (list_empty(head)) {
> - list_add(&block->link, head);
> - return;
> + while (*link) {
> + parent = *link;
> + node = rb_entry(parent, struct drm_buddy_block, rb);
> +
> + if (offset < drm_buddy_block_offset(node))
> + link = &parent->rb_left;
> + else
> + link = &parent->rb_right;
> }
>
> - list_for_each_entry(node, head, link)
> - if (drm_buddy_block_offset(block) < drm_buddy_block_offset(node))
> - break;
> + rb_link_node(&block->rb, parent, link);
> + rb_insert_color(&block->rb, root);
> +}
static inline bool __drm_bb_less(const struct drm_buddy_block *a,
const struct drm_buddy_block *b)
{
return drm_buddy_block_offset(a) < drm_buddy_block_offset(b);
}
#define __node_2_drm_bb(node) rb_entry((node), struct drm_buddy_block, rb)
static inline bool rb_drm_bb_less(struct rb_node *a, const struct rb_node *b)
{
return __drm_bb_less(__node_2_drm_bb(a), __node_2_drm_bb(b));
}
static void rbtree_insert(struct drm_buddy *mm, struct drm_buddy_block *block)
{
rb_add(block->rb, &mm->free_tree[drm_buddy_block_order(block)], rb_drm_bb_less);
}
> +
> +static void rbtree_remove(struct drm_buddy *mm,
> + struct drm_buddy_block *block)
> +{
> + struct rb_root *root;
> +
> + root = &mm->free_tree[drm_buddy_block_order(block)];
> + rb_erase(&block->rb, root);
>
> - __list_add(&block->link, node->link.prev, &node->link);
> + RB_CLEAR_NODE(&block->rb);
> +}
> +
> +static inline struct drm_buddy_block *
> +rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
> +{
> + struct rb_node *node = rb_last(&mm->free_tree[order]);
> +
> + return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
> +}
rb_add_cached() caches the leftmost entry, if you invert the key, the
last is first.
> diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
> index 8d2ba3749866..17190bb4837c 100644
> --- a/include/linux/rbtree.h
> +++ b/include/linux/rbtree.h
> @@ -79,6 +79,62 @@ static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent
> ____ptr ? rb_entry(____ptr, type, member) : NULL; \
> })
>
> +/**
> + * rbtree_for_each_entry - iterate in-order over rb_root of given type
> + *
> + * @pos: the 'type *' to use as a loop cursor.
> + * @root: 'rb_root *' of the rbtree.
> + * @member: the name of the rb_node field within 'type'.
> + */
> +#define rbtree_for_each_entry(pos, root, member) \
> + for ((pos) = rb_entry_safe(rb_first(root), typeof(*(pos)), member); \
> + (pos); \
> + (pos) = rb_entry_safe(rb_next(&(pos)->member), typeof(*(pos)), member))
> +
> +/**
> + * rbtree_reverse_for_each_entry - iterate in reverse in-order over rb_root
> + * of given type
> + *
> + * @pos: the 'type *' to use as a loop cursor.
> + * @root: 'rb_root *' of the rbtree.
> + * @member: the name of the rb_node field within 'type'.
> + */
> +#define rbtree_reverse_for_each_entry(pos, root, member) \
> + for ((pos) = rb_entry_safe(rb_last(root), typeof(*(pos)), member); \
> + (pos); \
> + (pos) = rb_entry_safe(rb_prev(&(pos)->member), typeof(*(pos)), member))
> +
> +/**
> + * rbtree_for_each_entry_safe - iterate in-order over rb_root safe against removal
> + *
> + * @pos: the 'type *' to use as a loop cursor
> + * @n: another 'type *' to use as temporary storage
> + * @root: 'rb_root *' of the rbtree
> + * @member: the name of the rb_node field within 'type'
> + */
> +#define rbtree_for_each_entry_safe(pos, n, root, member) \
> + for ((pos) = rb_entry_safe(rb_first(root), typeof(*(pos)), member), \
> + (n) = (pos) ? rb_entry_safe(rb_next(&(pos)->member), typeof(*(pos)), member) : NULL; \
> + (pos); \
> + (pos) = (n), \
> + (n) = (pos) ? rb_entry_safe(rb_next(&(pos)->member), typeof(*(pos)), member) : NULL)
> +
> +/**
> + * rbtree_reverse_for_each_entry_safe - iterate in reverse in-order over rb_root
> + * safe against removal
> + *
> + * @pos: the struct type * to use as a loop cursor.
> + * @n: another struct type * to use as temporary storage.
> + * @root: pointer to struct rb_root to iterate.
> + * @member: name of the rb_node field within the struct.
> + */
> +#define rbtree_reverse_for_each_entry_safe(pos, n, root, member) \
> + for ((pos) = rb_entry_safe(rb_last(root), typeof(*(pos)), member), \
> + (n) = (pos) ? rb_entry_safe(rb_prev(&(pos)->member), typeof(*(pos)), member) : NULL; \
> + (pos); \
> + (pos) = (n), \
> + (n) = (pos) ? rb_entry_safe(rb_prev(&(pos)->member), typeof(*(pos)), member) : NULL)
> +
Not really a fan of these. That's typically a sign you're doing it
wrong. Full tree iteration is actually slower than linked list.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v5 1/2] drm/buddy: Optimize free block management with RB tree
2025-09-01 19:41 ` Peter Zijlstra
@ 2025-09-02 6:09 ` Arunpravin Paneer Selvam
0 siblings, 0 replies; 5+ messages in thread
From: Arunpravin Paneer Selvam @ 2025-09-02 6:09 UTC (permalink / raw)
To: Peter Zijlstra
Cc: christian.koenig, matthew.auld, jani.nikula, dri-devel, amd-gfx,
intel-gfx, intel-xe, linux-kernel, stable, alexander.deucher
On 9/2/2025 1:11 AM, Peter Zijlstra wrote:
> On Tue, Sep 02, 2025 at 12:26:04AM +0530, Arunpravin Paneer Selvam wrote:
>> Replace the freelist (O(n)) used for free block management with a
>> red-black tree, providing more efficient O(log n) search, insert,
>> and delete operations. This improves scalability and performance
>> when managing large numbers of free blocks per order (e.g., hundreds
>> or thousands).
> Did you consider the interval tree?
In this allocator, free blocks are tracked individually by order and not
as arbitrary ranges. The
operations are keyed insert/delete/lookup, for which an rbtree is
sufficient and simper, AFAIK.
>
>
>> @@ -41,23 +43,53 @@ static void drm_block_free(struct drm_buddy *mm,
>> kmem_cache_free(slab_blocks, block);
>> }
>>
>> -static void list_insert_sorted(struct drm_buddy *mm,
>> - struct drm_buddy_block *block)
>> +static void rbtree_insert(struct drm_buddy *mm,
>> + struct drm_buddy_block *block)
>> {
>> + struct rb_root *root = &mm->free_tree[drm_buddy_block_order(block)];
>> + struct rb_node **link = &root->rb_node;
>> + struct rb_node *parent = NULL;
>> struct drm_buddy_block *node;
>> - struct list_head *head;
>> + u64 offset;
>> +
>> + offset = drm_buddy_block_offset(block);
>>
>> - head = &mm->free_list[drm_buddy_block_order(block)];
>> - if (list_empty(head)) {
>> - list_add(&block->link, head);
>> - return;
>> + while (*link) {
>> + parent = *link;
>> + node = rb_entry(parent, struct drm_buddy_block, rb);
>> +
>> + if (offset < drm_buddy_block_offset(node))
>> + link = &parent->rb_left;
>> + else
>> + link = &parent->rb_right;
>> }
>>
>> - list_for_each_entry(node, head, link)
>> - if (drm_buddy_block_offset(block) < drm_buddy_block_offset(node))
>> - break;
>> + rb_link_node(&block->rb, parent, link);
>> + rb_insert_color(&block->rb, root);
>> +}
> static inline bool __drm_bb_less(const struct drm_buddy_block *a,
> const struct drm_buddy_block *b)
> {
> return drm_buddy_block_offset(a) < drm_buddy_block_offset(b);
> }
>
> #define __node_2_drm_bb(node) rb_entry((node), struct drm_buddy_block, rb)
>
> static inline bool rb_drm_bb_less(struct rb_node *a, const struct rb_node *b)
> {
> return __drm_bb_less(__node_2_drm_bb(a), __node_2_drm_bb(b));
> }
>
> static void rbtree_insert(struct drm_buddy *mm, struct drm_buddy_block *block)
> {
> rb_add(block->rb, &mm->free_tree[drm_buddy_block_order(block)], rb_drm_bb_less);
> }
>
>> +
>> +static void rbtree_remove(struct drm_buddy *mm,
>> + struct drm_buddy_block *block)
>> +{
>> + struct rb_root *root;
>> +
>> + root = &mm->free_tree[drm_buddy_block_order(block)];
>> + rb_erase(&block->rb, root);
>>
>> - __list_add(&block->link, node->link.prev, &node->link);
>> + RB_CLEAR_NODE(&block->rb);
>> +}
>> +
>> +static inline struct drm_buddy_block *
>> +rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
>> +{
>> + struct rb_node *node = rb_last(&mm->free_tree[order]);
>> +
>> + return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
>> +}
> rb_add_cached() caches the leftmost entry, if you invert the key, the
> last is first.
With inversion, the in-tree ordering changes from natural ascending
offsets to descending,
which can break assumptions in existing buddy allocator code that
expects ascending order.
>
>> diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
>> index 8d2ba3749866..17190bb4837c 100644
>> --- a/include/linux/rbtree.h
>> +++ b/include/linux/rbtree.h
>> @@ -79,6 +79,62 @@ static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent
>> ____ptr ? rb_entry(____ptr, type, member) : NULL; \
>> })
>>
>> +/**
>> + * rbtree_for_each_entry - iterate in-order over rb_root of given type
>> + *
>> + * @pos: the 'type *' to use as a loop cursor.
>> + * @root: 'rb_root *' of the rbtree.
>> + * @member: the name of the rb_node field within 'type'.
>> + */
>> +#define rbtree_for_each_entry(pos, root, member) \
>> + for ((pos) = rb_entry_safe(rb_first(root), typeof(*(pos)), member); \
>> + (pos); \
>> + (pos) = rb_entry_safe(rb_next(&(pos)->member), typeof(*(pos)), member))
>> +
>> +/**
>> + * rbtree_reverse_for_each_entry - iterate in reverse in-order over rb_root
>> + * of given type
>> + *
>> + * @pos: the 'type *' to use as a loop cursor.
>> + * @root: 'rb_root *' of the rbtree.
>> + * @member: the name of the rb_node field within 'type'.
>> + */
>> +#define rbtree_reverse_for_each_entry(pos, root, member) \
>> + for ((pos) = rb_entry_safe(rb_last(root), typeof(*(pos)), member); \
>> + (pos); \
>> + (pos) = rb_entry_safe(rb_prev(&(pos)->member), typeof(*(pos)), member))
>> +
>> +/**
>> + * rbtree_for_each_entry_safe - iterate in-order over rb_root safe against removal
>> + *
>> + * @pos: the 'type *' to use as a loop cursor
>> + * @n: another 'type *' to use as temporary storage
>> + * @root: 'rb_root *' of the rbtree
>> + * @member: the name of the rb_node field within 'type'
>> + */
>> +#define rbtree_for_each_entry_safe(pos, n, root, member) \
>> + for ((pos) = rb_entry_safe(rb_first(root), typeof(*(pos)), member), \
>> + (n) = (pos) ? rb_entry_safe(rb_next(&(pos)->member), typeof(*(pos)), member) : NULL; \
>> + (pos); \
>> + (pos) = (n), \
>> + (n) = (pos) ? rb_entry_safe(rb_next(&(pos)->member), typeof(*(pos)), member) : NULL)
>> +
>> +/**
>> + * rbtree_reverse_for_each_entry_safe - iterate in reverse in-order over rb_root
>> + * safe against removal
>> + *
>> + * @pos: the struct type * to use as a loop cursor.
>> + * @n: another struct type * to use as temporary storage.
>> + * @root: pointer to struct rb_root to iterate.
>> + * @member: name of the rb_node field within the struct.
>> + */
>> +#define rbtree_reverse_for_each_entry_safe(pos, n, root, member) \
>> + for ((pos) = rb_entry_safe(rb_last(root), typeof(*(pos)), member), \
>> + (n) = (pos) ? rb_entry_safe(rb_prev(&(pos)->member), typeof(*(pos)), member) : NULL; \
>> + (pos); \
>> + (pos) = (n), \
>> + (n) = (pos) ? rb_entry_safe(rb_prev(&(pos)->member), typeof(*(pos)), member) : NULL)
>> +
> Not really a fan of these. That's typically a sign you're doing it
> wrong. Full tree iteration is actually slower than linked list.
I understand your concern about full-tree iteration being slower than a
list walk. In our current use cases, though,
the cost is not on the hot path and performance is comparable or even
better to list traversal. We occasionally need
to walk the full set of blocks to perform specific operations, and these
macros make that code simpler and
less error-prone. They aren't meant to replace targeted lookups or
bounded walks, just to cover where a full
traversal is necessary.
Thanks,
Arun.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v5 1/2] drm/buddy: Optimize free block management with RB tree
2025-09-01 18:56 [PATCH v5 1/2] drm/buddy: Optimize free block management with RB tree Arunpravin Paneer Selvam
2025-09-01 19:41 ` Peter Zijlstra
@ 2025-09-02 10:23 ` Jani Nikula
2025-09-02 10:35 ` Arunpravin Paneer Selvam
1 sibling, 1 reply; 5+ messages in thread
From: Jani Nikula @ 2025-09-02 10:23 UTC (permalink / raw)
To: Arunpravin Paneer Selvam, christian.koenig, matthew.auld, peterz,
dri-devel, amd-gfx, intel-gfx, intel-xe, linux-kernel, stable
Cc: alexander.deucher, Arunpravin Paneer Selvam
On Tue, 02 Sep 2025, Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com> wrote:
> Replace the freelist (O(n)) used for free block management with a
> red-black tree, providing more efficient O(log n) search, insert,
> and delete operations. This improves scalability and performance
> when managing large numbers of free blocks per order (e.g., hundreds
> or thousands).
>
> In the VK-CTS memory stress subtest, the buddy manager merges
> fragmented memory and inserts freed blocks into the freelist. Since
> freelist insertion is O(n), this becomes a bottleneck as fragmentation
> increases. Benchmarking shows list_insert_sorted() consumes ~52.69% CPU
> with the freelist, compared to just 0.03% with the RB tree
> (rbtree_insert.isra.0), despite performing the same sorted insert.
>
> This also improves performance in heavily fragmented workloads,
> such as games or graphics tests that stress memory.
>
> v3(Matthew):
> - Remove RB_EMPTY_NODE check in force_merge function.
> - Rename rb for loop macros to have less generic names and move to
> .c file.
> - Make the rb node rb and link field as union.
>
> v4(Jani Nikula):
> - The kernel-doc comment should be "/**"
> - Move all the rbtree macros to rbtree.h and add parens to ensure
> correct precedence.
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> ---
> drivers/gpu/drm/drm_buddy.c | 142 ++++++++++++++++++++++--------------
> include/drm/drm_buddy.h | 9 ++-
> include/linux/rbtree.h | 56 ++++++++++++++
> 3 files changed, 152 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index a94061f373de..978cabfbcf0f 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
...
> +static inline struct drm_buddy_block *
> +rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
Drive-by reminder that "inline" in a .c file is, in absense of evidence
to the contrary, superfluous. Please just let the compiler do its job.
BR,
Jani.
--
Jani Nikula, Intel
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v5 1/2] drm/buddy: Optimize free block management with RB tree
2025-09-02 10:23 ` Jani Nikula
@ 2025-09-02 10:35 ` Arunpravin Paneer Selvam
0 siblings, 0 replies; 5+ messages in thread
From: Arunpravin Paneer Selvam @ 2025-09-02 10:35 UTC (permalink / raw)
To: Jani Nikula, christian.koenig, matthew.auld, peterz, dri-devel,
amd-gfx, intel-gfx, intel-xe, linux-kernel, stable
Cc: alexander.deucher
On 9/2/2025 3:53 PM, Jani Nikula wrote:
> On Tue, 02 Sep 2025, Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com> wrote:
>> Replace the freelist (O(n)) used for free block management with a
>> red-black tree, providing more efficient O(log n) search, insert,
>> and delete operations. This improves scalability and performance
>> when managing large numbers of free blocks per order (e.g., hundreds
>> or thousands).
>>
>> In the VK-CTS memory stress subtest, the buddy manager merges
>> fragmented memory and inserts freed blocks into the freelist. Since
>> freelist insertion is O(n), this becomes a bottleneck as fragmentation
>> increases. Benchmarking shows list_insert_sorted() consumes ~52.69% CPU
>> with the freelist, compared to just 0.03% with the RB tree
>> (rbtree_insert.isra.0), despite performing the same sorted insert.
>>
>> This also improves performance in heavily fragmented workloads,
>> such as games or graphics tests that stress memory.
>>
>> v3(Matthew):
>> - Remove RB_EMPTY_NODE check in force_merge function.
>> - Rename rb for loop macros to have less generic names and move to
>> .c file.
>> - Make the rb node rb and link field as union.
>>
>> v4(Jani Nikula):
>> - The kernel-doc comment should be "/**"
>> - Move all the rbtree macros to rbtree.h and add parens to ensure
>> correct precedence.
>>
>> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
>> ---
>> drivers/gpu/drm/drm_buddy.c | 142 ++++++++++++++++++++++--------------
>> include/drm/drm_buddy.h | 9 ++-
>> include/linux/rbtree.h | 56 ++++++++++++++
>> 3 files changed, 152 insertions(+), 55 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index a94061f373de..978cabfbcf0f 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
> ...
>
>> +static inline struct drm_buddy_block *
>> +rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
> Drive-by reminder that "inline" in a .c file is, in absense of evidence
> to the contrary, superfluous. Please just let the compiler do its job.
Ah, I missed taking out the inline. Thanks for catching that.
Thanks,
Arun.
>
> BR,
> Jani.
>
>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2025-09-02 10:35 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-09-01 18:56 [PATCH v5 1/2] drm/buddy: Optimize free block management with RB tree Arunpravin Paneer Selvam
2025-09-01 19:41 ` Peter Zijlstra
2025-09-02 6:09 ` Arunpravin Paneer Selvam
2025-09-02 10:23 ` Jani Nikula
2025-09-02 10:35 ` Arunpravin Paneer Selvam
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).