* [RFC PATCH] drm/buddy: Optimize large alignment handling to avoid unnecessary splits
@ 2025-12-11 12:23 Arunpravin Paneer Selvam
2025-12-11 13:24 ` ✓ i915.CI.BAT: success for " Patchwork
2026-01-09 16:42 ` [RFC PATCH] " Matthew Auld
0 siblings, 2 replies; 6+ messages in thread
From: Arunpravin Paneer Selvam @ 2025-12-11 12:23 UTC (permalink / raw)
To: christian.koenig, matthew.auld, amd-gfx, dri-devel, intel-gfx,
intel-xe
Cc: alexander.deucher, Arunpravin Paneer Selvam
Large alignment requests previously forced the allocator to search by
alignment order, causing large free blocks to be split even when a
smaller aligned range existed within them. This patch switches the
search to prioritize the requested size and uses an augmented RB-tree
field (subtree_max_alignment) to efficiently locate blocks that satisfy
both size and alignment. This prevents unnecessary block splitting and
significantly reduces fragmentation.
A practical example is the VKCTS test
dEQP-VK.memory.allocation.basic.size_8KiB.reverse.count_4000, which
allocates 8 KiB buffers with a 256 KiB alignment. Previously, these
requests caused the allocator to split large blocks despite having
smaller aligned portions within them that could satisfy the allocation.
The new design now identifies and allocates from these portions,
avoiding unnecessary splitting.
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Suggested-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/drm_buddy.c | 205 +++++++++++++++++++++++++++++++++---
include/drm/drm_buddy.h | 3 +
2 files changed, 191 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f2c92902e4a3..f749814bb270 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -23,6 +23,18 @@ static struct kmem_cache *slab_blocks;
#define for_each_free_tree(tree) \
for ((tree) = 0; (tree) < DRM_BUDDY_MAX_FREE_TREES; (tree)++)
+static unsigned int drm_buddy_min_offset_or_size_order(struct drm_buddy_block *block)
+{
+ return min_t(unsigned int,
+ __ffs(drm_buddy_block_offset(block)),
+ drm_buddy_block_order(block));
+}
+
+RB_DECLARE_CALLBACKS_MAX(static, drm_buddy_augment_cb,
+ struct drm_buddy_block, rb,
+ unsigned int, subtree_max_alignment,
+ drm_buddy_min_offset_or_size_order);
+
static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
struct drm_buddy_block *parent,
unsigned int order,
@@ -40,6 +52,9 @@ static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
block->header |= order;
block->parent = parent;
+ block->subtree_max_alignment =
+ drm_buddy_min_offset_or_size_order(block);
+
RB_CLEAR_NODE(&block->rb);
BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
@@ -76,26 +91,32 @@ static bool rbtree_is_empty(struct rb_root *root)
return RB_EMPTY_ROOT(root);
}
-static bool drm_buddy_block_offset_less(const struct drm_buddy_block *block,
- const struct drm_buddy_block *node)
-{
- return drm_buddy_block_offset(block) < drm_buddy_block_offset(node);
-}
-
-static bool rbtree_block_offset_less(struct rb_node *block,
- const struct rb_node *node)
-{
- return drm_buddy_block_offset_less(rbtree_get_free_block(block),
- rbtree_get_free_block(node));
-}
-
static void rbtree_insert(struct drm_buddy *mm,
struct drm_buddy_block *block,
enum drm_buddy_free_tree tree)
{
- rb_add(&block->rb,
- &mm->free_trees[tree][drm_buddy_block_order(block)],
- rbtree_block_offset_less);
+ struct rb_node **link, *parent = NULL;
+ struct drm_buddy_block *node;
+ struct rb_root *root;
+ unsigned int order;
+
+ order = drm_buddy_block_order(block);
+
+ root = &mm->free_trees[tree][order];
+ link = &root->rb_node;
+
+ while (*link) {
+ parent = *link;
+ node = rbtree_get_free_block(parent);
+
+ if (drm_buddy_block_offset(block) < drm_buddy_block_offset(node))
+ link = &parent->rb_left;
+ else
+ link = &parent->rb_right;
+ }
+
+ rb_link_node(&block->rb, parent, link);
+ rb_insert_augmented(&block->rb, root, &drm_buddy_augment_cb);
}
static void rbtree_remove(struct drm_buddy *mm,
@@ -108,7 +129,7 @@ static void rbtree_remove(struct drm_buddy *mm,
tree = get_block_tree(block);
root = &mm->free_trees[tree][order];
- rb_erase(&block->rb, root);
+ rb_erase_augmented(&block->rb, root, &drm_buddy_augment_cb);
RB_CLEAR_NODE(&block->rb);
}
@@ -596,6 +617,88 @@ static bool block_incompatible(struct drm_buddy_block *block, unsigned int flags
return needs_clear != drm_buddy_block_is_clear(block);
}
+static bool drm_buddy_subtree_can_satisfy(struct rb_node *node,
+ unsigned int alignment)
+{
+ struct drm_buddy_block *block;
+
+ if (!node)
+ return false;
+
+ block = rbtree_get_free_block(node);
+ return block->subtree_max_alignment >= alignment;
+}
+
+static struct drm_buddy_block *
+drm_buddy_find_block_aligned(struct drm_buddy *mm,
+ enum drm_buddy_free_tree tree,
+ unsigned int order,
+ unsigned int tmp,
+ unsigned int alignment,
+ unsigned long flags)
+{
+ struct rb_root *root = &mm->free_trees[tree][tmp];
+ struct rb_node *rb = root->rb_node;
+
+ /* Try to find a block of the requested size that is already aligned */
+ while (rb) {
+ struct drm_buddy_block *block = rbtree_get_free_block(rb);
+ struct rb_node *left_node = rb->rb_left, *right_node = rb->rb_right;
+
+ if (left_node) {
+ if (drm_buddy_subtree_can_satisfy(left_node, alignment)) {
+ rb = left_node;
+ continue;
+ }
+ }
+
+ if (drm_buddy_block_order(block) >= order &&
+ __ffs(drm_buddy_block_offset(block)) >= alignment)
+ return block;
+
+ if (right_node) {
+ if (drm_buddy_subtree_can_satisfy(right_node, alignment)) {
+ rb = right_node;
+ continue;
+ }
+ }
+
+ break;
+ }
+
+ if (tmp < max(order, alignment))
+ return NULL;
+
+ /* If none found, look for a larger block that can satisfy the alignment */
+ rb = root->rb_node;
+ while (rb) {
+ struct drm_buddy_block *block = rbtree_get_free_block(rb);
+ struct rb_node *left_node = rb->rb_left, *right_node = rb->rb_right;
+
+ if (left_node) {
+ if (drm_buddy_subtree_can_satisfy(left_node, alignment)) {
+ rb = left_node;
+ continue;
+ }
+ }
+
+ if (drm_buddy_block_order(block) >= max(order, alignment) &&
+ drm_buddy_min_offset_or_size_order(block) >= alignment)
+ return block;
+
+ if (right_node) {
+ if (drm_buddy_subtree_can_satisfy(right_node, alignment)) {
+ rb = right_node;
+ continue;
+ }
+ }
+
+ break;
+ }
+
+ return NULL;
+}
+
static struct drm_buddy_block *
__alloc_range_bias(struct drm_buddy *mm,
u64 start, u64 end,
@@ -798,6 +901,69 @@ alloc_from_freetree(struct drm_buddy *mm,
return ERR_PTR(err);
}
+static int drm_buddy_offset_aligned_allocation(struct drm_buddy *mm,
+ u64 size,
+ u64 min_block_size,
+ unsigned long flags,
+ struct list_head *blocks)
+{
+ struct drm_buddy_block *block = NULL;
+ unsigned int order, tmp, alignment;
+ enum drm_buddy_free_tree tree;
+ unsigned long pages;
+
+ alignment = ilog2(min_block_size);
+ pages = size >> ilog2(mm->chunk_size);
+ order = fls(pages) - 1;
+
+ tree = (flags & DRM_BUDDY_CLEAR_ALLOCATION) ?
+ DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
+
+ for (tmp = order; tmp <= mm->max_order; ++tmp) {
+ block = drm_buddy_find_block_aligned(mm, tree, order,
+ tmp, alignment, flags);
+ if (!block) {
+ tree = (tree == DRM_BUDDY_CLEAR_TREE) ?
+ DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE;
+ block = drm_buddy_find_block_aligned(mm, tree, order,
+ tmp, alignment, flags);
+ }
+
+ if (block)
+ break;
+ }
+
+ if (!block)
+ return -ENOSPC;
+
+ while (drm_buddy_block_order(block) > order) {
+ unsigned int child_order = drm_buddy_block_order(block) - 1;
+ struct drm_buddy_block *left, *right;
+ int r;
+
+ r = split_block(mm, block);
+ if (r)
+ return r;
+
+ left = block->left;
+ right = block->right;
+
+ if (child_order >= alignment)
+ block = right;
+ else
+ block = left;
+ }
+
+ mark_allocated(mm, block);
+ mm->avail -= drm_buddy_block_size(mm, block);
+ if (drm_buddy_block_is_clear(block))
+ mm->clear_avail -= drm_buddy_block_size(mm, block);
+ kmemleak_update_trace(block);
+ list_add_tail(&block->link, blocks);
+
+ return 0;
+}
+
static int __alloc_range(struct drm_buddy *mm,
struct list_head *dfs,
u64 start, u64 size,
@@ -1147,6 +1313,11 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
min_block_size = size;
/* Align size value to min_block_size */
} else if (!IS_ALIGNED(size, min_block_size)) {
+ if (min_block_size > size && is_power_of_2(size))
+ return drm_buddy_offset_aligned_allocation(mm, size,
+ min_block_size,
+ flags,
+ blocks);
size = round_up(size, min_block_size);
}
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index d7891d08f67a..da6a40fb4763 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/rbtree.h>
+#include <linux/rbtree_augmented.h>
#include <drm/drm_print.h>
@@ -60,6 +61,8 @@ struct drm_buddy_block {
};
struct list_head tmp_link;
+
+ unsigned int subtree_max_alignment;
};
/* Order-zero must be at least SZ_4K */
--
2.34.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* ✓ i915.CI.BAT: success for drm/buddy: Optimize large alignment handling to avoid unnecessary splits
2025-12-11 12:23 [RFC PATCH] drm/buddy: Optimize large alignment handling to avoid unnecessary splits Arunpravin Paneer Selvam
@ 2025-12-11 13:24 ` Patchwork
2026-01-09 16:42 ` [RFC PATCH] " Matthew Auld
1 sibling, 0 replies; 6+ messages in thread
From: Patchwork @ 2025-12-11 13:24 UTC (permalink / raw)
To: Arunpravin Paneer Selvam; +Cc: intel-gfx
[-- Attachment #1: Type: text/plain, Size: 2103 bytes --]
== Series Details ==
Series: drm/buddy: Optimize large alignment handling to avoid unnecessary splits
URL : https://patchwork.freedesktop.org/series/158802/
State : success
== Summary ==
CI Bug Log - changes from CI_DRM_17661 -> Patchwork_158802v1
====================================================
Summary
-------
**SUCCESS**
No regressions found.
External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_158802v1/index.html
Participating hosts (43 -> 41)
------------------------------
Missing (2): bat-dg2-13 fi-snb-2520m
Known issues
------------
Here are the changes found in Patchwork_158802v1 that come from known issues:
### IGT changes ###
#### Issues hit ####
* igt@i915_selftest@live@workarounds:
- bat-dg2-9: [PASS][1] -> [DMESG-FAIL][2] ([i915#12061]) +1 other test dmesg-fail
[1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17661/bat-dg2-9/igt@i915_selftest@live@workarounds.html
[2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_158802v1/bat-dg2-9/igt@i915_selftest@live@workarounds.html
#### Possible fixes ####
* igt@kms_pm_rpm@basic-rte:
- bat-rpls-4: [DMESG-WARN][3] ([i915#13400]) -> [PASS][4]
[3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_17661/bat-rpls-4/igt@kms_pm_rpm@basic-rte.html
[4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_158802v1/bat-rpls-4/igt@kms_pm_rpm@basic-rte.html
[i915#12061]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12061
[i915#13400]: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13400
Build changes
-------------
* Linux: CI_DRM_17661 -> Patchwork_158802v1
CI-20190529: 20190529
CI_DRM_17661: c6ed6720baf0914824cb33b5f003ea3a0b0786da @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_8664: 28cc709ad89c0ef569569f19f4772d4cca354963 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
Patchwork_158802v1: c6ed6720baf0914824cb33b5f003ea3a0b0786da @ git://anongit.freedesktop.org/gfx-ci/linux
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_158802v1/index.html
[-- Attachment #2: Type: text/html, Size: 2713 bytes --]
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC PATCH] drm/buddy: Optimize large alignment handling to avoid unnecessary splits
2025-12-11 12:23 [RFC PATCH] drm/buddy: Optimize large alignment handling to avoid unnecessary splits Arunpravin Paneer Selvam
2025-12-11 13:24 ` ✓ i915.CI.BAT: success for " Patchwork
@ 2026-01-09 16:42 ` Matthew Auld
2026-01-22 14:13 ` Arunpravin Paneer Selvam
1 sibling, 1 reply; 6+ messages in thread
From: Matthew Auld @ 2026-01-09 16:42 UTC (permalink / raw)
To: Arunpravin Paneer Selvam, christian.koenig, amd-gfx, dri-devel,
intel-gfx, intel-xe
Cc: alexander.deucher
On 11/12/2025 12:23, Arunpravin Paneer Selvam wrote:
> Large alignment requests previously forced the allocator to search by
> alignment order, causing large free blocks to be split even when a
> smaller aligned range existed within them. This patch switches the
> search to prioritize the requested size and uses an augmented RB-tree
> field (subtree_max_alignment) to efficiently locate blocks that satisfy
> both size and alignment. This prevents unnecessary block splitting and
> significantly reduces fragmentation.
>
> A practical example is the VKCTS test
> dEQP-VK.memory.allocation.basic.size_8KiB.reverse.count_4000, which
> allocates 8 KiB buffers with a 256 KiB alignment. Previously, these
> requests caused the allocator to split large blocks despite having
> smaller aligned portions within them that could satisfy the allocation.
> The new design now identifies and allocates from these portions,
> avoiding unnecessary splitting.
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> Suggested-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/drm_buddy.c | 205 +++++++++++++++++++++++++++++++++---
> include/drm/drm_buddy.h | 3 +
> 2 files changed, 191 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index f2c92902e4a3..f749814bb270 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -23,6 +23,18 @@ static struct kmem_cache *slab_blocks;
> #define for_each_free_tree(tree) \
> for ((tree) = 0; (tree) < DRM_BUDDY_MAX_FREE_TREES; (tree)++)
>
> +static unsigned int drm_buddy_min_offset_or_size_order(struct drm_buddy_block *block)
> +{
> + return min_t(unsigned int,
> + __ffs(drm_buddy_block_offset(block)),
> + drm_buddy_block_order(block));
Didn't quite get this bit. Why do we pick the min between the order and
"alignment"? Say we have order zero block but is has 256K addr alignment
this just selects zero? What is the idea here?
> +}
> +
> +RB_DECLARE_CALLBACKS_MAX(static, drm_buddy_augment_cb,
> + struct drm_buddy_block, rb,
> + unsigned int, subtree_max_alignment,
> + drm_buddy_min_offset_or_size_order);
> +
> static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
> struct drm_buddy_block *parent,
> unsigned int order,
> @@ -40,6 +52,9 @@ static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
> block->header |= order;
> block->parent = parent;
>
> + block->subtree_max_alignment =
> + drm_buddy_min_offset_or_size_order(block);
> +
> RB_CLEAR_NODE(&block->rb);
>
> BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
> @@ -76,26 +91,32 @@ static bool rbtree_is_empty(struct rb_root *root)
> return RB_EMPTY_ROOT(root);
> }
>
> -static bool drm_buddy_block_offset_less(const struct drm_buddy_block *block,
> - const struct drm_buddy_block *node)
> -{
> - return drm_buddy_block_offset(block) < drm_buddy_block_offset(node);
> -}
> -
> -static bool rbtree_block_offset_less(struct rb_node *block,
> - const struct rb_node *node)
> -{
> - return drm_buddy_block_offset_less(rbtree_get_free_block(block),
> - rbtree_get_free_block(node));
> -}
> -
> static void rbtree_insert(struct drm_buddy *mm,
> struct drm_buddy_block *block,
> enum drm_buddy_free_tree tree)
> {
> - rb_add(&block->rb,
> - &mm->free_trees[tree][drm_buddy_block_order(block)],
> - rbtree_block_offset_less);
> + struct rb_node **link, *parent = NULL;
> + struct drm_buddy_block *node;
> + struct rb_root *root;
> + unsigned int order;
> +
> + order = drm_buddy_block_order(block);
> +
> + root = &mm->free_trees[tree][order];
> + link = &root->rb_node;
> +
> + while (*link) {
> + parent = *link;
> + node = rbtree_get_free_block(parent);
> +
> + if (drm_buddy_block_offset(block) < drm_buddy_block_offset(node))
> + link = &parent->rb_left;
> + else
> + link = &parent->rb_right;
Is this correct? From the docs it sounds like we are meant to update the
max alignment for each subtree on the path leading up to the insertion?
It looks like insert_augmentated will only do it if there is something
to be rebalanced.
> + }
> +
> + rb_link_node(&block->rb, parent, link);
> + rb_insert_augmented(&block->rb, root, &drm_buddy_augment_cb);
> }
>
> static void rbtree_remove(struct drm_buddy *mm,
> @@ -108,7 +129,7 @@ static void rbtree_remove(struct drm_buddy *mm,
> tree = get_block_tree(block);
> root = &mm->free_trees[tree][order];
>
> - rb_erase(&block->rb, root);
> + rb_erase_augmented(&block->rb, root, &drm_buddy_augment_cb);
> RB_CLEAR_NODE(&block->rb);
> }
>
> @@ -596,6 +617,88 @@ static bool block_incompatible(struct drm_buddy_block *block, unsigned int flags
> return needs_clear != drm_buddy_block_is_clear(block);
> }
>
> +static bool drm_buddy_subtree_can_satisfy(struct rb_node *node,
> + unsigned int alignment)
> +{
> + struct drm_buddy_block *block;
> +
> + if (!node)
> + return false;
> +
> + block = rbtree_get_free_block(node);
> + return block->subtree_max_alignment >= alignment;
> +}
> +
> +static struct drm_buddy_block *
> +drm_buddy_find_block_aligned(struct drm_buddy *mm,
> + enum drm_buddy_free_tree tree,
> + unsigned int order,
> + unsigned int tmp,
> + unsigned int alignment,
> + unsigned long flags)
> +{
> + struct rb_root *root = &mm->free_trees[tree][tmp];
> + struct rb_node *rb = root->rb_node;
> +
> + /* Try to find a block of the requested size that is already aligned */
> + while (rb) {
> + struct drm_buddy_block *block = rbtree_get_free_block(rb);
> + struct rb_node *left_node = rb->rb_left, *right_node = rb->rb_right;
> +
> + if (left_node) {
> + if (drm_buddy_subtree_can_satisfy(left_node, alignment)) {
> + rb = left_node;
> + continue;
> + }
> + }
> +
> + if (drm_buddy_block_order(block) >= order &&
> + __ffs(drm_buddy_block_offset(block)) >= alignment)
> + return block;
> +
> + if (right_node) {
> + if (drm_buddy_subtree_can_satisfy(right_node, alignment)) {
> + rb = right_node;
> + continue;
> + }
> + }
> +
> + break;
> + }
> +
> + if (tmp < max(order, alignment))
> + return NULL;
> +
> + /* If none found, look for a larger block that can satisfy the alignment */
What is the idea here? IIUC we are looking at some specific order and we
want some min addr alignment, if the above can't find any subtree with
suitable max alignment then we should bail and try the next order? Why
instead do we do the search again with the same alignment below?
> + rb = root->rb_node;
> + while (rb) {
> + struct drm_buddy_block *block = rbtree_get_free_block(rb);
> + struct rb_node *left_node = rb->rb_left, *right_node = rb->rb_right;
> +
> + if (left_node) {
> + if (drm_buddy_subtree_can_satisfy(left_node, alignment)) {
> + rb = left_node;
> + continue;
> + }
> + }
> +
> + if (drm_buddy_block_order(block) >= max(order, alignment) &&
> + drm_buddy_min_offset_or_size_order(block) >= alignment)
> + return block;
> +
> + if (right_node) {
> + if (drm_buddy_subtree_can_satisfy(right_node, alignment)) {
> + rb = right_node;
> + continue;
> + }
> + }
> +
> + break;
> + }
> +
> + return NULL;
> +}
> +
> static struct drm_buddy_block *
> __alloc_range_bias(struct drm_buddy *mm,
> u64 start, u64 end,
> @@ -798,6 +901,69 @@ alloc_from_freetree(struct drm_buddy *mm,
> return ERR_PTR(err);
> }
>
> +static int drm_buddy_offset_aligned_allocation(struct drm_buddy *mm,
> + u64 size,
> + u64 min_block_size,
> + unsigned long flags,
> + struct list_head *blocks)
> +{
> + struct drm_buddy_block *block = NULL;
> + unsigned int order, tmp, alignment;
> + enum drm_buddy_free_tree tree;
> + unsigned long pages;
> +
> + alignment = ilog2(min_block_size);
> + pages = size >> ilog2(mm->chunk_size);
> + order = fls(pages) - 1;
> +
> + tree = (flags & DRM_BUDDY_CLEAR_ALLOCATION) ?
> + DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
> +
> + for (tmp = order; tmp <= mm->max_order; ++tmp) {
> + block = drm_buddy_find_block_aligned(mm, tree, order,
> + tmp, alignment, flags);
> + if (!block) {
> + tree = (tree == DRM_BUDDY_CLEAR_TREE) ?
> + DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE;
> + block = drm_buddy_find_block_aligned(mm, tree, order,
> + tmp, alignment, flags);
> + }
> +
> + if (block)
> + break;
> + }
> +
> + if (!block)
> + return -ENOSPC;
> +
> + while (drm_buddy_block_order(block) > order) {
> + unsigned int child_order = drm_buddy_block_order(block) - 1;
> + struct drm_buddy_block *left, *right;
> + int r;
> +
> + r = split_block(mm, block);
> + if (r)
> + return r;
> +
> + left = block->left;
> + right = block->right;
> +
> + if (child_order >= alignment)
> + block = right;
> + else
> + block = left;
> + }
> +
> + mark_allocated(mm, block);
> + mm->avail -= drm_buddy_block_size(mm, block);
> + if (drm_buddy_block_is_clear(block))
> + mm->clear_avail -= drm_buddy_block_size(mm, block);
> + kmemleak_update_trace(block);
> + list_add_tail(&block->link, blocks);
> +
> + return 0;
> +}
> +
> static int __alloc_range(struct drm_buddy *mm,
> struct list_head *dfs,
> u64 start, u64 size,
> @@ -1147,6 +1313,11 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> min_block_size = size;
> /* Align size value to min_block_size */
> } else if (!IS_ALIGNED(size, min_block_size)) {
> + if (min_block_size > size && is_power_of_2(size))
> + return drm_buddy_offset_aligned_allocation(mm, size,
> + min_block_size,
> + flags,
> + blocks);
> size = round_up(size, min_block_size);
> }
>
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index d7891d08f67a..da6a40fb4763 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -11,6 +11,7 @@
> #include <linux/slab.h>
> #include <linux/sched.h>
> #include <linux/rbtree.h>
> +#include <linux/rbtree_augmented.h>
>
> #include <drm/drm_print.h>
>
> @@ -60,6 +61,8 @@ struct drm_buddy_block {
> };
>
> struct list_head tmp_link;
> +
> + unsigned int subtree_max_alignment;
> };
>
> /* Order-zero must be at least SZ_4K */
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC PATCH] drm/buddy: Optimize large alignment handling to avoid unnecessary splits
2026-01-09 16:42 ` [RFC PATCH] " Matthew Auld
@ 2026-01-22 14:13 ` Arunpravin Paneer Selvam
2026-01-22 15:02 ` Matthew Auld
0 siblings, 1 reply; 6+ messages in thread
From: Arunpravin Paneer Selvam @ 2026-01-22 14:13 UTC (permalink / raw)
To: Matthew Auld, christian.koenig, amd-gfx, dri-devel, intel-gfx,
intel-xe
Cc: alexander.deucher
On 09/01/26 22:12, Matthew Auld wrote:
> On 11/12/2025 12:23, Arunpravin Paneer Selvam wrote:
>> Large alignment requests previously forced the allocator to search by
>> alignment order, causing large free blocks to be split even when a
>> smaller aligned range existed within them. This patch switches the
>> search to prioritize the requested size and uses an augmented RB-tree
>> field (subtree_max_alignment) to efficiently locate blocks that satisfy
>> both size and alignment. This prevents unnecessary block splitting and
>> significantly reduces fragmentation.
>>
>> A practical example is the VKCTS test
>> dEQP-VK.memory.allocation.basic.size_8KiB.reverse.count_4000, which
>> allocates 8 KiB buffers with a 256 KiB alignment. Previously, these
>> requests caused the allocator to split large blocks despite having
>> smaller aligned portions within them that could satisfy the allocation.
>> The new design now identifies and allocates from these portions,
>> avoiding unnecessary splitting.
>>
>> Signed-off-by: Arunpravin Paneer Selvam
>> <Arunpravin.PaneerSelvam@amd.com>
>> Suggested-by: Christian König <christian.koenig@amd.com>
>> ---
>> drivers/gpu/drm/drm_buddy.c | 205 +++++++++++++++++++++++++++++++++---
>> include/drm/drm_buddy.h | 3 +
>> 2 files changed, 191 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index f2c92902e4a3..f749814bb270 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -23,6 +23,18 @@ static struct kmem_cache *slab_blocks;
>> #define for_each_free_tree(tree) \
>> for ((tree) = 0; (tree) < DRM_BUDDY_MAX_FREE_TREES; (tree)++)
>> +static unsigned int drm_buddy_min_offset_or_size_order(struct
>> drm_buddy_block *block)
>> +{
>> + return min_t(unsigned int,
>> + __ffs(drm_buddy_block_offset(block)),
>> + drm_buddy_block_order(block));
>
> Didn't quite get this bit. Why do we pick the min between the order
> and "alignment"? Say we have order zero block but is has 256K addr
> alignment this just selects zero? What is the idea here?
Sorry for the confusion. I mixed up two concepts and I have sent the
offset alignment only patch. Please have a look.
>
>> +}
>> +
>> +RB_DECLARE_CALLBACKS_MAX(static, drm_buddy_augment_cb,
>> + struct drm_buddy_block, rb,
>> + unsigned int, subtree_max_alignment,
>> + drm_buddy_min_offset_or_size_order);
>> +
>> static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
>> struct drm_buddy_block *parent,
>> unsigned int order,
>> @@ -40,6 +52,9 @@ static struct drm_buddy_block
>> *drm_block_alloc(struct drm_buddy *mm,
>> block->header |= order;
>> block->parent = parent;
>> + block->subtree_max_alignment =
>> + drm_buddy_min_offset_or_size_order(block);
>> +
>> RB_CLEAR_NODE(&block->rb);
>> BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
>> @@ -76,26 +91,32 @@ static bool rbtree_is_empty(struct rb_root *root)
>> return RB_EMPTY_ROOT(root);
>> }
>> -static bool drm_buddy_block_offset_less(const struct
>> drm_buddy_block *block,
>> - const struct drm_buddy_block *node)
>> -{
>> - return drm_buddy_block_offset(block) <
>> drm_buddy_block_offset(node);
>> -}
>> -
>> -static bool rbtree_block_offset_less(struct rb_node *block,
>> - const struct rb_node *node)
>> -{
>> - return drm_buddy_block_offset_less(rbtree_get_free_block(block),
>> - rbtree_get_free_block(node));
>> -}
>> -
>> static void rbtree_insert(struct drm_buddy *mm,
>> struct drm_buddy_block *block,
>> enum drm_buddy_free_tree tree)
>> {
>> - rb_add(&block->rb,
>> - &mm->free_trees[tree][drm_buddy_block_order(block)],
>> - rbtree_block_offset_less);
>> + struct rb_node **link, *parent = NULL;
>> + struct drm_buddy_block *node;
>> + struct rb_root *root;
>> + unsigned int order;
>> +
>> + order = drm_buddy_block_order(block);
>> +
>> + root = &mm->free_trees[tree][order];
>> + link = &root->rb_node;
>> +
>> + while (*link) {
>> + parent = *link;
>> + node = rbtree_get_free_block(parent);
>> +
>> + if (drm_buddy_block_offset(block) <
>> drm_buddy_block_offset(node))
>> + link = &parent->rb_left;
>> + else
>> + link = &parent->rb_right;
>
> Is this correct? From the docs it sounds like we are meant to update
> the max alignment for each subtree on the path leading up to the
> insertion? It looks like insert_augmentated will only do it if there
> is something to be rebalanced.
AFAIU from the docs, rb_insert_augmented() updates the augmented value
(subtree_max_alignment) for all nodes on the insertion path, not only
when a rotation happens.
>
>> + }
>> +
>> + rb_link_node(&block->rb, parent, link);
>> + rb_insert_augmented(&block->rb, root, &drm_buddy_augment_cb);
>> }
>> static void rbtree_remove(struct drm_buddy *mm,
>> @@ -108,7 +129,7 @@ static void rbtree_remove(struct drm_buddy *mm,
>> tree = get_block_tree(block);
>> root = &mm->free_trees[tree][order];
>> - rb_erase(&block->rb, root);
>> + rb_erase_augmented(&block->rb, root, &drm_buddy_augment_cb);
>> RB_CLEAR_NODE(&block->rb);
>> }
>> @@ -596,6 +617,88 @@ static bool block_incompatible(struct
>> drm_buddy_block *block, unsigned int flags
>> return needs_clear != drm_buddy_block_is_clear(block);
>> }
>> +static bool drm_buddy_subtree_can_satisfy(struct rb_node *node,
>> + unsigned int alignment)
>> +{
>> + struct drm_buddy_block *block;
>> +
>> + if (!node)
>> + return false;
>> +
>> + block = rbtree_get_free_block(node);
>> + return block->subtree_max_alignment >= alignment;
>> +}
>> +
>> +static struct drm_buddy_block *
>> +drm_buddy_find_block_aligned(struct drm_buddy *mm,
>> + enum drm_buddy_free_tree tree,
>> + unsigned int order,
>> + unsigned int tmp,
>> + unsigned int alignment,
>> + unsigned long flags)
>> +{
>> + struct rb_root *root = &mm->free_trees[tree][tmp];
>> + struct rb_node *rb = root->rb_node;
>> +
>> + /* Try to find a block of the requested size that is already
>> aligned */
>> + while (rb) {
>> + struct drm_buddy_block *block = rbtree_get_free_block(rb);
>> + struct rb_node *left_node = rb->rb_left, *right_node =
>> rb->rb_right;
>> +
>> + if (left_node) {
>> + if (drm_buddy_subtree_can_satisfy(left_node, alignment)) {
>> + rb = left_node;
>> + continue;
>> + }
>> + }
>> +
>> + if (drm_buddy_block_order(block) >= order &&
>> + __ffs(drm_buddy_block_offset(block)) >= alignment)
>> + return block;
>> +
>> + if (right_node) {
>> + if (drm_buddy_subtree_can_satisfy(right_node, alignment)) {
>> + rb = right_node;
>> + continue;
>> + }
>> + }
>> +
>> + break;
>> + }
>> +
>> + if (tmp < max(order, alignment))
>> + return NULL;
>> +
>> + /* If none found, look for a larger block that can satisfy the
>> alignment */
>
> What is the idea here? IIUC we are looking at some specific order and
> we want some min addr alignment, if the above can't find any subtree
> with suitable max alignment then we should bail and try the next
> order? Why instead do we do the search again with the same alignment
> below?
Same as above, I mixed up two concepts. Please review v1 of offset
aligned allocation patch.
Regards,
Arun.
>
>> + rb = root->rb_node;
>> + while (rb) {
>> + struct drm_buddy_block *block = rbtree_get_free_block(rb);
>> + struct rb_node *left_node = rb->rb_left, *right_node =
>> rb->rb_right;
>> +
>> + if (left_node) {
>> + if (drm_buddy_subtree_can_satisfy(left_node, alignment)) {
>> + rb = left_node;
>> + continue;
>> + }
>> + }
>> +
>> + if (drm_buddy_block_order(block) >= max(order, alignment) &&
>> + drm_buddy_min_offset_or_size_order(block) >= alignment)
>> + return block;
>> +
>> + if (right_node) {
>> + if (drm_buddy_subtree_can_satisfy(right_node, alignment)) {
>> + rb = right_node;
>> + continue;
>> + }
>> + }
>> +
>> + break;
>> + }
>> +
>> + return NULL;
>> +}
>> +
>> static struct drm_buddy_block *
>> __alloc_range_bias(struct drm_buddy *mm,
>> u64 start, u64 end,
>> @@ -798,6 +901,69 @@ alloc_from_freetree(struct drm_buddy *mm,
>> return ERR_PTR(err);
>> }
>> +static int drm_buddy_offset_aligned_allocation(struct drm_buddy *mm,
>> + u64 size,
>> + u64 min_block_size,
>> + unsigned long flags,
>> + struct list_head *blocks)
>> +{
>> + struct drm_buddy_block *block = NULL;
>> + unsigned int order, tmp, alignment;
>> + enum drm_buddy_free_tree tree;
>> + unsigned long pages;
>> +
>> + alignment = ilog2(min_block_size);
>> + pages = size >> ilog2(mm->chunk_size);
>> + order = fls(pages) - 1;
>> +
>> + tree = (flags & DRM_BUDDY_CLEAR_ALLOCATION) ?
>> + DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
>> +
>> + for (tmp = order; tmp <= mm->max_order; ++tmp) {
>> + block = drm_buddy_find_block_aligned(mm, tree, order,
>> + tmp, alignment, flags);
>> + if (!block) {
>> + tree = (tree == DRM_BUDDY_CLEAR_TREE) ?
>> + DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE;
>> + block = drm_buddy_find_block_aligned(mm, tree, order,
>> + tmp, alignment, flags);
>> + }
>> +
>> + if (block)
>> + break;
>> + }
>> +
>> + if (!block)
>> + return -ENOSPC;
>> +
>> + while (drm_buddy_block_order(block) > order) {
>> + unsigned int child_order = drm_buddy_block_order(block) - 1;
>> + struct drm_buddy_block *left, *right;
>> + int r;
>> +
>> + r = split_block(mm, block);
>> + if (r)
>> + return r;
>> +
>> + left = block->left;
>> + right = block->right;
>> +
>> + if (child_order >= alignment)
>> + block = right;
>> + else
>> + block = left;
>> + }
>> +
>> + mark_allocated(mm, block);
>> + mm->avail -= drm_buddy_block_size(mm, block);
>> + if (drm_buddy_block_is_clear(block))
>> + mm->clear_avail -= drm_buddy_block_size(mm, block);
>> + kmemleak_update_trace(block);
>> + list_add_tail(&block->link, blocks);
>> +
>> + return 0;
>> +}
>> +
>> static int __alloc_range(struct drm_buddy *mm,
>> struct list_head *dfs,
>> u64 start, u64 size,
>> @@ -1147,6 +1313,11 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>> min_block_size = size;
>> /* Align size value to min_block_size */
>> } else if (!IS_ALIGNED(size, min_block_size)) {
>> + if (min_block_size > size && is_power_of_2(size))
>> + return drm_buddy_offset_aligned_allocation(mm, size,
>> + min_block_size,
>> + flags,
>> + blocks);
>> size = round_up(size, min_block_size);
>> }
>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>> index d7891d08f67a..da6a40fb4763 100644
>> --- a/include/drm/drm_buddy.h
>> +++ b/include/drm/drm_buddy.h
>> @@ -11,6 +11,7 @@
>> #include <linux/slab.h>
>> #include <linux/sched.h>
>> #include <linux/rbtree.h>
>> +#include <linux/rbtree_augmented.h>
>> #include <drm/drm_print.h>
>> @@ -60,6 +61,8 @@ struct drm_buddy_block {
>> };
>> struct list_head tmp_link;
>> +
>> + unsigned int subtree_max_alignment;
>> };
>> /* Order-zero must be at least SZ_4K */
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC PATCH] drm/buddy: Optimize large alignment handling to avoid unnecessary splits
2026-01-22 14:13 ` Arunpravin Paneer Selvam
@ 2026-01-22 15:02 ` Matthew Auld
2026-01-29 13:19 ` Arunpravin Paneer Selvam
0 siblings, 1 reply; 6+ messages in thread
From: Matthew Auld @ 2026-01-22 15:02 UTC (permalink / raw)
To: Arunpravin Paneer Selvam, christian.koenig, amd-gfx, dri-devel,
intel-gfx, intel-xe
Cc: alexander.deucher
On 22/01/2026 14:13, Arunpravin Paneer Selvam wrote:
>
> On 09/01/26 22:12, Matthew Auld wrote:
>> On 11/12/2025 12:23, Arunpravin Paneer Selvam wrote:
>>> Large alignment requests previously forced the allocator to search by
>>> alignment order, causing large free blocks to be split even when a
>>> smaller aligned range existed within them. This patch switches the
>>> search to prioritize the requested size and uses an augmented RB-tree
>>> field (subtree_max_alignment) to efficiently locate blocks that satisfy
>>> both size and alignment. This prevents unnecessary block splitting and
>>> significantly reduces fragmentation.
>>>
>>> A practical example is the VKCTS test
>>> dEQP-VK.memory.allocation.basic.size_8KiB.reverse.count_4000, which
>>> allocates 8 KiB buffers with a 256 KiB alignment. Previously, these
>>> requests caused the allocator to split large blocks despite having
>>> smaller aligned portions within them that could satisfy the allocation.
>>> The new design now identifies and allocates from these portions,
>>> avoiding unnecessary splitting.
>>>
>>> Signed-off-by: Arunpravin Paneer Selvam
>>> <Arunpravin.PaneerSelvam@amd.com>
>>> Suggested-by: Christian König <christian.koenig@amd.com>
>>> ---
>>> drivers/gpu/drm/drm_buddy.c | 205 +++++++++++++++++++++++++++++++++---
>>> include/drm/drm_buddy.h | 3 +
>>> 2 files changed, 191 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>>> index f2c92902e4a3..f749814bb270 100644
>>> --- a/drivers/gpu/drm/drm_buddy.c
>>> +++ b/drivers/gpu/drm/drm_buddy.c
>>> @@ -23,6 +23,18 @@ static struct kmem_cache *slab_blocks;
>>> #define for_each_free_tree(tree) \
>>> for ((tree) = 0; (tree) < DRM_BUDDY_MAX_FREE_TREES; (tree)++)
>>> +static unsigned int drm_buddy_min_offset_or_size_order(struct
>>> drm_buddy_block *block)
>>> +{
>>> + return min_t(unsigned int,
>>> + __ffs(drm_buddy_block_offset(block)),
>>> + drm_buddy_block_order(block));
>>
>> Didn't quite get this bit. Why do we pick the min between the order
>> and "alignment"? Say we have order zero block but is has 256K addr
>> alignment this just selects zero? What is the idea here?
> Sorry for the confusion. I mixed up two concepts and I have sent the
> offset alignment only patch. Please have a look.
>>
>>> +}
>>> +
>>> +RB_DECLARE_CALLBACKS_MAX(static, drm_buddy_augment_cb,
>>> + struct drm_buddy_block, rb,
>>> + unsigned int, subtree_max_alignment,
>>> + drm_buddy_min_offset_or_size_order);
>>> +
>>> static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
>>> struct drm_buddy_block *parent,
>>> unsigned int order,
>>> @@ -40,6 +52,9 @@ static struct drm_buddy_block
>>> *drm_block_alloc(struct drm_buddy *mm,
>>> block->header |= order;
>>> block->parent = parent;
>>> + block->subtree_max_alignment =
>>> + drm_buddy_min_offset_or_size_order(block);
>>> +
>>> RB_CLEAR_NODE(&block->rb);
>>> BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
>>> @@ -76,26 +91,32 @@ static bool rbtree_is_empty(struct rb_root *root)
>>> return RB_EMPTY_ROOT(root);
>>> }
>>> -static bool drm_buddy_block_offset_less(const struct
>>> drm_buddy_block *block,
>>> - const struct drm_buddy_block *node)
>>> -{
>>> - return drm_buddy_block_offset(block) <
>>> drm_buddy_block_offset(node);
>>> -}
>>> -
>>> -static bool rbtree_block_offset_less(struct rb_node *block,
>>> - const struct rb_node *node)
>>> -{
>>> - return drm_buddy_block_offset_less(rbtree_get_free_block(block),
>>> - rbtree_get_free_block(node));
>>> -}
>>> -
>>> static void rbtree_insert(struct drm_buddy *mm,
>>> struct drm_buddy_block *block,
>>> enum drm_buddy_free_tree tree)
>>> {
>>> - rb_add(&block->rb,
>>> - &mm->free_trees[tree][drm_buddy_block_order(block)],
>>> - rbtree_block_offset_less);
>>> + struct rb_node **link, *parent = NULL;
>>> + struct drm_buddy_block *node;
>>> + struct rb_root *root;
>>> + unsigned int order;
>>> +
>>> + order = drm_buddy_block_order(block);
>>> +
>>> + root = &mm->free_trees[tree][order];
>>> + link = &root->rb_node;
>>> +
>>> + while (*link) {
>>> + parent = *link;
>>> + node = rbtree_get_free_block(parent);
>>> +
>>> + if (drm_buddy_block_offset(block) <
>>> drm_buddy_block_offset(node))
>>> + link = &parent->rb_left;
>>> + else
>>> + link = &parent->rb_right;
>>
>> Is this correct? From the docs it sounds like we are meant to update
>> the max alignment for each subtree on the path leading up to the
>> insertion? It looks like insert_augmentated will only do it if there
>> is something to be rebalanced.
> AFAIU from the docs, rb_insert_augmented() updates the augmented value
> (subtree_max_alignment) for all nodes on the insertion path, not only
> when a rotation happens.
Unless I'm looking in the wrong place, the docs for insert_augmented():
"On insertion, the user must update the augmented information on the
path leading to the inserted node, then call rb_link_node() as usual and
rb_insert_augmented() instead of the usual rb_insert_color() call. If
rb_insert_augmented() rebalances the rbtree, it will callback into a
user provided function to update the augmented information on the
affected subtrees."
Plus the example code they give for the augmented case also matches
this, so pretty sure we need it. See interval_tree_insert [1]. Also if
that is indeed true, then perhaps something to add to the kunit to catch
this case.
[1] https://docs.kernel.org/core-api/rbtree.html#sample-usage
>>
>>> + }
>>> +
>>> + rb_link_node(&block->rb, parent, link);
>>> + rb_insert_augmented(&block->rb, root, &drm_buddy_augment_cb);
>>> }
>>> static void rbtree_remove(struct drm_buddy *mm,
>>> @@ -108,7 +129,7 @@ static void rbtree_remove(struct drm_buddy *mm,
>>> tree = get_block_tree(block);
>>> root = &mm->free_trees[tree][order];
>>> - rb_erase(&block->rb, root);
>>> + rb_erase_augmented(&block->rb, root, &drm_buddy_augment_cb);
>>> RB_CLEAR_NODE(&block->rb);
>>> }
>>> @@ -596,6 +617,88 @@ static bool block_incompatible(struct
>>> drm_buddy_block *block, unsigned int flags
>>> return needs_clear != drm_buddy_block_is_clear(block);
>>> }
>>> +static bool drm_buddy_subtree_can_satisfy(struct rb_node *node,
>>> + unsigned int alignment)
>>> +{
>>> + struct drm_buddy_block *block;
>>> +
>>> + if (!node)
>>> + return false;
>>> +
>>> + block = rbtree_get_free_block(node);
>>> + return block->subtree_max_alignment >= alignment;
>>> +}
>>> +
>>> +static struct drm_buddy_block *
>>> +drm_buddy_find_block_aligned(struct drm_buddy *mm,
>>> + enum drm_buddy_free_tree tree,
>>> + unsigned int order,
>>> + unsigned int tmp,
>>> + unsigned int alignment,
>>> + unsigned long flags)
>>> +{
>>> + struct rb_root *root = &mm->free_trees[tree][tmp];
>>> + struct rb_node *rb = root->rb_node;
>>> +
>>> + /* Try to find a block of the requested size that is already
>>> aligned */
>>> + while (rb) {
>>> + struct drm_buddy_block *block = rbtree_get_free_block(rb);
>>> + struct rb_node *left_node = rb->rb_left, *right_node = rb-
>>> >rb_right;
>>> +
>>> + if (left_node) {
>>> + if (drm_buddy_subtree_can_satisfy(left_node, alignment)) {
>>> + rb = left_node;
>>> + continue;
>>> + }
>>> + }
>>> +
>>> + if (drm_buddy_block_order(block) >= order &&
>>> + __ffs(drm_buddy_block_offset(block)) >= alignment)
>>> + return block;
>>> +
>>> + if (right_node) {
>>> + if (drm_buddy_subtree_can_satisfy(right_node, alignment)) {
>>> + rb = right_node;
>>> + continue;
>>> + }
>>> + }
>>> +
>>> + break;
>>> + }
>>> +
>>> + if (tmp < max(order, alignment))
>>> + return NULL;
>>> +
>>> + /* If none found, look for a larger block that can satisfy the
>>> alignment */
>>
>> What is the idea here? IIUC we are looking at some specific order and
>> we want some min addr alignment, if the above can't find any subtree
>> with suitable max alignment then we should bail and try the next
>> order? Why instead do we do the search again with the same alignment
>> below?
>
> Same as above, I mixed up two concepts. Please review v1 of offset
> aligned allocation patch.
>
> Regards,
>
> Arun.
>
>>
>>> + rb = root->rb_node;
>>> + while (rb) {
>>> + struct drm_buddy_block *block = rbtree_get_free_block(rb);
>>> + struct rb_node *left_node = rb->rb_left, *right_node = rb-
>>> >rb_right;
>>> +
>>> + if (left_node) {
>>> + if (drm_buddy_subtree_can_satisfy(left_node, alignment)) {
>>> + rb = left_node;
>>> + continue;
>>> + }
>>> + }
>>> +
>>> + if (drm_buddy_block_order(block) >= max(order, alignment) &&
>>> + drm_buddy_min_offset_or_size_order(block) >= alignment)
>>> + return block;
>>> +
>>> + if (right_node) {
>>> + if (drm_buddy_subtree_can_satisfy(right_node, alignment)) {
>>> + rb = right_node;
>>> + continue;
>>> + }
>>> + }
>>> +
>>> + break;
>>> + }
>>> +
>>> + return NULL;
>>> +}
>>> +
>>> static struct drm_buddy_block *
>>> __alloc_range_bias(struct drm_buddy *mm,
>>> u64 start, u64 end,
>>> @@ -798,6 +901,69 @@ alloc_from_freetree(struct drm_buddy *mm,
>>> return ERR_PTR(err);
>>> }
>>> +static int drm_buddy_offset_aligned_allocation(struct drm_buddy *mm,
>>> + u64 size,
>>> + u64 min_block_size,
>>> + unsigned long flags,
>>> + struct list_head *blocks)
>>> +{
>>> + struct drm_buddy_block *block = NULL;
>>> + unsigned int order, tmp, alignment;
>>> + enum drm_buddy_free_tree tree;
>>> + unsigned long pages;
>>> +
>>> + alignment = ilog2(min_block_size);
>>> + pages = size >> ilog2(mm->chunk_size);
>>> + order = fls(pages) - 1;
>>> +
>>> + tree = (flags & DRM_BUDDY_CLEAR_ALLOCATION) ?
>>> + DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
>>> +
>>> + for (tmp = order; tmp <= mm->max_order; ++tmp) {
>>> + block = drm_buddy_find_block_aligned(mm, tree, order,
>>> + tmp, alignment, flags);
>>> + if (!block) {
>>> + tree = (tree == DRM_BUDDY_CLEAR_TREE) ?
>>> + DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE;
>>> + block = drm_buddy_find_block_aligned(mm, tree, order,
>>> + tmp, alignment, flags);
>>> + }
>>> +
>>> + if (block)
>>> + break;
>>> + }
>>> +
>>> + if (!block)
>>> + return -ENOSPC;
>>> +
>>> + while (drm_buddy_block_order(block) > order) {
>>> + unsigned int child_order = drm_buddy_block_order(block) - 1;
>>> + struct drm_buddy_block *left, *right;
>>> + int r;
>>> +
>>> + r = split_block(mm, block);
>>> + if (r)
>>> + return r;
>>> +
>>> + left = block->left;
>>> + right = block->right;
>>> +
>>> + if (child_order >= alignment)
>>> + block = right;
>>> + else
>>> + block = left;
>>> + }
>>> +
>>> + mark_allocated(mm, block);
>>> + mm->avail -= drm_buddy_block_size(mm, block);
>>> + if (drm_buddy_block_is_clear(block))
>>> + mm->clear_avail -= drm_buddy_block_size(mm, block);
>>> + kmemleak_update_trace(block);
>>> + list_add_tail(&block->link, blocks);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> static int __alloc_range(struct drm_buddy *mm,
>>> struct list_head *dfs,
>>> u64 start, u64 size,
>>> @@ -1147,6 +1313,11 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>> min_block_size = size;
>>> /* Align size value to min_block_size */
>>> } else if (!IS_ALIGNED(size, min_block_size)) {
>>> + if (min_block_size > size && is_power_of_2(size))
>>> + return drm_buddy_offset_aligned_allocation(mm, size,
>>> + min_block_size,
>>> + flags,
>>> + blocks);
>>> size = round_up(size, min_block_size);
>>> }
>>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>>> index d7891d08f67a..da6a40fb4763 100644
>>> --- a/include/drm/drm_buddy.h
>>> +++ b/include/drm/drm_buddy.h
>>> @@ -11,6 +11,7 @@
>>> #include <linux/slab.h>
>>> #include <linux/sched.h>
>>> #include <linux/rbtree.h>
>>> +#include <linux/rbtree_augmented.h>
>>> #include <drm/drm_print.h>
>>> @@ -60,6 +61,8 @@ struct drm_buddy_block {
>>> };
>>> struct list_head tmp_link;
>>> +
>>> + unsigned int subtree_max_alignment;
>>> };
>>> /* Order-zero must be at least SZ_4K */
>>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC PATCH] drm/buddy: Optimize large alignment handling to avoid unnecessary splits
2026-01-22 15:02 ` Matthew Auld
@ 2026-01-29 13:19 ` Arunpravin Paneer Selvam
0 siblings, 0 replies; 6+ messages in thread
From: Arunpravin Paneer Selvam @ 2026-01-29 13:19 UTC (permalink / raw)
To: Matthew Auld, christian.koenig, amd-gfx, dri-devel, intel-gfx,
intel-xe
Cc: alexander.deucher
Hi Matthew,
On 22/01/26 20:32, Matthew Auld wrote:
> On 22/01/2026 14:13, Arunpravin Paneer Selvam wrote:
>>
>> On 09/01/26 22:12, Matthew Auld wrote:
>>> On 11/12/2025 12:23, Arunpravin Paneer Selvam wrote:
>>>> Large alignment requests previously forced the allocator to search by
>>>> alignment order, causing large free blocks to be split even when a
>>>> smaller aligned range existed within them. This patch switches the
>>>> search to prioritize the requested size and uses an augmented RB-tree
>>>> field (subtree_max_alignment) to efficiently locate blocks that
>>>> satisfy
>>>> both size and alignment. This prevents unnecessary block splitting and
>>>> significantly reduces fragmentation.
>>>>
>>>> A practical example is the VKCTS test
>>>> dEQP-VK.memory.allocation.basic.size_8KiB.reverse.count_4000, which
>>>> allocates 8 KiB buffers with a 256 KiB alignment. Previously, these
>>>> requests caused the allocator to split large blocks despite having
>>>> smaller aligned portions within them that could satisfy the
>>>> allocation.
>>>> The new design now identifies and allocates from these portions,
>>>> avoiding unnecessary splitting.
>>>>
>>>> Signed-off-by: Arunpravin Paneer Selvam
>>>> <Arunpravin.PaneerSelvam@amd.com>
>>>> Suggested-by: Christian König <christian.koenig@amd.com>
>>>> ---
>>>> drivers/gpu/drm/drm_buddy.c | 205
>>>> +++++++++++++++++++++++++++++++++---
>>>> include/drm/drm_buddy.h | 3 +
>>>> 2 files changed, 191 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>>>> index f2c92902e4a3..f749814bb270 100644
>>>> --- a/drivers/gpu/drm/drm_buddy.c
>>>> +++ b/drivers/gpu/drm/drm_buddy.c
>>>> @@ -23,6 +23,18 @@ static struct kmem_cache *slab_blocks;
>>>> #define for_each_free_tree(tree) \
>>>> for ((tree) = 0; (tree) < DRM_BUDDY_MAX_FREE_TREES; (tree)++)
>>>> +static unsigned int drm_buddy_min_offset_or_size_order(struct
>>>> drm_buddy_block *block)
>>>> +{
>>>> + return min_t(unsigned int,
>>>> + __ffs(drm_buddy_block_offset(block)),
>>>> + drm_buddy_block_order(block));
>>>
>>> Didn't quite get this bit. Why do we pick the min between the order
>>> and "alignment"? Say we have order zero block but is has 256K addr
>>> alignment this just selects zero? What is the idea here?
>> Sorry for the confusion. I mixed up two concepts and I have sent the
>> offset alignment only patch. Please have a look.
>>>
>>>> +}
>>>> +
>>>> +RB_DECLARE_CALLBACKS_MAX(static, drm_buddy_augment_cb,
>>>> + struct drm_buddy_block, rb,
>>>> + unsigned int, subtree_max_alignment,
>>>> + drm_buddy_min_offset_or_size_order);
>>>> +
>>>> static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
>>>> struct drm_buddy_block *parent,
>>>> unsigned int order,
>>>> @@ -40,6 +52,9 @@ static struct drm_buddy_block
>>>> *drm_block_alloc(struct drm_buddy *mm,
>>>> block->header |= order;
>>>> block->parent = parent;
>>>> + block->subtree_max_alignment =
>>>> + drm_buddy_min_offset_or_size_order(block);
>>>> +
>>>> RB_CLEAR_NODE(&block->rb);
>>>> BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
>>>> @@ -76,26 +91,32 @@ static bool rbtree_is_empty(struct rb_root *root)
>>>> return RB_EMPTY_ROOT(root);
>>>> }
>>>> -static bool drm_buddy_block_offset_less(const struct
>>>> drm_buddy_block *block,
>>>> - const struct drm_buddy_block *node)
>>>> -{
>>>> - return drm_buddy_block_offset(block) <
>>>> drm_buddy_block_offset(node);
>>>> -}
>>>> -
>>>> -static bool rbtree_block_offset_less(struct rb_node *block,
>>>> - const struct rb_node *node)
>>>> -{
>>>> - return drm_buddy_block_offset_less(rbtree_get_free_block(block),
>>>> - rbtree_get_free_block(node));
>>>> -}
>>>> -
>>>> static void rbtree_insert(struct drm_buddy *mm,
>>>> struct drm_buddy_block *block,
>>>> enum drm_buddy_free_tree tree)
>>>> {
>>>> - rb_add(&block->rb,
>>>> - &mm->free_trees[tree][drm_buddy_block_order(block)],
>>>> - rbtree_block_offset_less);
>>>> + struct rb_node **link, *parent = NULL;
>>>> + struct drm_buddy_block *node;
>>>> + struct rb_root *root;
>>>> + unsigned int order;
>>>> +
>>>> + order = drm_buddy_block_order(block);
>>>> +
>>>> + root = &mm->free_trees[tree][order];
>>>> + link = &root->rb_node;
>>>> +
>>>> + while (*link) {
>>>> + parent = *link;
>>>> + node = rbtree_get_free_block(parent);
>>>> +
>>>> + if (drm_buddy_block_offset(block) <
>>>> drm_buddy_block_offset(node))
>>>> + link = &parent->rb_left;
>>>> + else
>>>> + link = &parent->rb_right;
>>>
>>> Is this correct? From the docs it sounds like we are meant to update
>>> the max alignment for each subtree on the path leading up to the
>>> insertion? It looks like insert_augmentated will only do it if there
>>> is something to be rebalanced.
>> AFAIU from the docs, rb_insert_augmented() updates the augmented
>> value (subtree_max_alignment) for all nodes on the insertion path,
>> not only when a rotation happens.
>
> Unless I'm looking in the wrong place, the docs for insert_augmented():
>
> "On insertion, the user must update the augmented information on the
> path leading to the inserted node, then call rb_link_node() as usual
> and rb_insert_augmented() instead of the usual rb_insert_color() call.
> If rb_insert_augmented() rebalances the rbtree, it will callback into
> a user provided function to update the augmented information on the
> affected subtrees."
>
> Plus the example code they give for the augmented case also matches
> this, so pretty sure we need it. See interval_tree_insert [1]. Also if
> that is indeed true, then perhaps something to add to the kunit to
> catch this case.
>
> [1] https://docs.kernel.org/core-api/rbtree.html#sample-usage
You are right. I am debugging the augmentation update path and working
on the fixes.
Thanks,
Arun.
>
>
>>>
>>>> + }
>>>> +
>>>> + rb_link_node(&block->rb, parent, link);
>>>> + rb_insert_augmented(&block->rb, root, &drm_buddy_augment_cb);
>>>> }
>>>> static void rbtree_remove(struct drm_buddy *mm,
>>>> @@ -108,7 +129,7 @@ static void rbtree_remove(struct drm_buddy *mm,
>>>> tree = get_block_tree(block);
>>>> root = &mm->free_trees[tree][order];
>>>> - rb_erase(&block->rb, root);
>>>> + rb_erase_augmented(&block->rb, root, &drm_buddy_augment_cb);
>>>> RB_CLEAR_NODE(&block->rb);
>>>> }
>>>> @@ -596,6 +617,88 @@ static bool block_incompatible(struct
>>>> drm_buddy_block *block, unsigned int flags
>>>> return needs_clear != drm_buddy_block_is_clear(block);
>>>> }
>>>> +static bool drm_buddy_subtree_can_satisfy(struct rb_node *node,
>>>> + unsigned int alignment)
>>>> +{
>>>> + struct drm_buddy_block *block;
>>>> +
>>>> + if (!node)
>>>> + return false;
>>>> +
>>>> + block = rbtree_get_free_block(node);
>>>> + return block->subtree_max_alignment >= alignment;
>>>> +}
>>>> +
>>>> +static struct drm_buddy_block *
>>>> +drm_buddy_find_block_aligned(struct drm_buddy *mm,
>>>> + enum drm_buddy_free_tree tree,
>>>> + unsigned int order,
>>>> + unsigned int tmp,
>>>> + unsigned int alignment,
>>>> + unsigned long flags)
>>>> +{
>>>> + struct rb_root *root = &mm->free_trees[tree][tmp];
>>>> + struct rb_node *rb = root->rb_node;
>>>> +
>>>> + /* Try to find a block of the requested size that is already
>>>> aligned */
>>>> + while (rb) {
>>>> + struct drm_buddy_block *block = rbtree_get_free_block(rb);
>>>> + struct rb_node *left_node = rb->rb_left, *right_node = rb-
>>>> >rb_right;
>>>> +
>>>> + if (left_node) {
>>>> + if (drm_buddy_subtree_can_satisfy(left_node,
>>>> alignment)) {
>>>> + rb = left_node;
>>>> + continue;
>>>> + }
>>>> + }
>>>> +
>>>> + if (drm_buddy_block_order(block) >= order &&
>>>> + __ffs(drm_buddy_block_offset(block)) >= alignment)
>>>> + return block;
>>>> +
>>>> + if (right_node) {
>>>> + if (drm_buddy_subtree_can_satisfy(right_node,
>>>> alignment)) {
>>>> + rb = right_node;
>>>> + continue;
>>>> + }
>>>> + }
>>>> +
>>>> + break;
>>>> + }
>>>> +
>>>> + if (tmp < max(order, alignment))
>>>> + return NULL;
>>>> +
>>>> + /* If none found, look for a larger block that can satisfy the
>>>> alignment */
>>>
>>> What is the idea here? IIUC we are looking at some specific order
>>> and we want some min addr alignment, if the above can't find any
>>> subtree with suitable max alignment then we should bail and try the
>>> next order? Why instead do we do the search again with the same
>>> alignment below?
>>
>> Same as above, I mixed up two concepts. Please review v1 of offset
>> aligned allocation patch.
>>
>> Regards,
>>
>> Arun.
>>
>>>
>>>> + rb = root->rb_node;
>>>> + while (rb) {
>>>> + struct drm_buddy_block *block = rbtree_get_free_block(rb);
>>>> + struct rb_node *left_node = rb->rb_left, *right_node = rb-
>>>> >rb_right;
>>>> +
>>>> + if (left_node) {
>>>> + if (drm_buddy_subtree_can_satisfy(left_node,
>>>> alignment)) {
>>>> + rb = left_node;
>>>> + continue;
>>>> + }
>>>> + }
>>>> +
>>>> + if (drm_buddy_block_order(block) >= max(order, alignment) &&
>>>> + drm_buddy_min_offset_or_size_order(block) >= alignment)
>>>> + return block;
>>>> +
>>>> + if (right_node) {
>>>> + if (drm_buddy_subtree_can_satisfy(right_node,
>>>> alignment)) {
>>>> + rb = right_node;
>>>> + continue;
>>>> + }
>>>> + }
>>>> +
>>>> + break;
>>>> + }
>>>> +
>>>> + return NULL;
>>>> +}
>>>> +
>>>> static struct drm_buddy_block *
>>>> __alloc_range_bias(struct drm_buddy *mm,
>>>> u64 start, u64 end,
>>>> @@ -798,6 +901,69 @@ alloc_from_freetree(struct drm_buddy *mm,
>>>> return ERR_PTR(err);
>>>> }
>>>> +static int drm_buddy_offset_aligned_allocation(struct drm_buddy
>>>> *mm,
>>>> + u64 size,
>>>> + u64 min_block_size,
>>>> + unsigned long flags,
>>>> + struct list_head *blocks)
>>>> +{
>>>> + struct drm_buddy_block *block = NULL;
>>>> + unsigned int order, tmp, alignment;
>>>> + enum drm_buddy_free_tree tree;
>>>> + unsigned long pages;
>>>> +
>>>> + alignment = ilog2(min_block_size);
>>>> + pages = size >> ilog2(mm->chunk_size);
>>>> + order = fls(pages) - 1;
>>>> +
>>>> + tree = (flags & DRM_BUDDY_CLEAR_ALLOCATION) ?
>>>> + DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
>>>> +
>>>> + for (tmp = order; tmp <= mm->max_order; ++tmp) {
>>>> + block = drm_buddy_find_block_aligned(mm, tree, order,
>>>> + tmp, alignment, flags);
>>>> + if (!block) {
>>>> + tree = (tree == DRM_BUDDY_CLEAR_TREE) ?
>>>> + DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE;
>>>> + block = drm_buddy_find_block_aligned(mm, tree, order,
>>>> + tmp, alignment, flags);
>>>> + }
>>>> +
>>>> + if (block)
>>>> + break;
>>>> + }
>>>> +
>>>> + if (!block)
>>>> + return -ENOSPC;
>>>> +
>>>> + while (drm_buddy_block_order(block) > order) {
>>>> + unsigned int child_order = drm_buddy_block_order(block) - 1;
>>>> + struct drm_buddy_block *left, *right;
>>>> + int r;
>>>> +
>>>> + r = split_block(mm, block);
>>>> + if (r)
>>>> + return r;
>>>> +
>>>> + left = block->left;
>>>> + right = block->right;
>>>> +
>>>> + if (child_order >= alignment)
>>>> + block = right;
>>>> + else
>>>> + block = left;
>>>> + }
>>>> +
>>>> + mark_allocated(mm, block);
>>>> + mm->avail -= drm_buddy_block_size(mm, block);
>>>> + if (drm_buddy_block_is_clear(block))
>>>> + mm->clear_avail -= drm_buddy_block_size(mm, block);
>>>> + kmemleak_update_trace(block);
>>>> + list_add_tail(&block->link, blocks);
>>>> +
>>>> + return 0;
>>>> +}
>>>> +
>>>> static int __alloc_range(struct drm_buddy *mm,
>>>> struct list_head *dfs,
>>>> u64 start, u64 size,
>>>> @@ -1147,6 +1313,11 @@ int drm_buddy_alloc_blocks(struct drm_buddy
>>>> *mm,
>>>> min_block_size = size;
>>>> /* Align size value to min_block_size */
>>>> } else if (!IS_ALIGNED(size, min_block_size)) {
>>>> + if (min_block_size > size && is_power_of_2(size))
>>>> + return drm_buddy_offset_aligned_allocation(mm, size,
>>>> + min_block_size,
>>>> + flags,
>>>> + blocks);
>>>> size = round_up(size, min_block_size);
>>>> }
>>>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>>>> index d7891d08f67a..da6a40fb4763 100644
>>>> --- a/include/drm/drm_buddy.h
>>>> +++ b/include/drm/drm_buddy.h
>>>> @@ -11,6 +11,7 @@
>>>> #include <linux/slab.h>
>>>> #include <linux/sched.h>
>>>> #include <linux/rbtree.h>
>>>> +#include <linux/rbtree_augmented.h>
>>>> #include <drm/drm_print.h>
>>>> @@ -60,6 +61,8 @@ struct drm_buddy_block {
>>>> };
>>>> struct list_head tmp_link;
>>>> +
>>>> + unsigned int subtree_max_alignment;
>>>> };
>>>> /* Order-zero must be at least SZ_4K */
>>>
>
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2026-01-29 13:19 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-12-11 12:23 [RFC PATCH] drm/buddy: Optimize large alignment handling to avoid unnecessary splits Arunpravin Paneer Selvam
2025-12-11 13:24 ` ✓ i915.CI.BAT: success for " Patchwork
2026-01-09 16:42 ` [RFC PATCH] " Matthew Auld
2026-01-22 14:13 ` Arunpravin Paneer Selvam
2026-01-22 15:02 ` Matthew Auld
2026-01-29 13:19 ` Arunpravin Paneer Selvam
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox