From: Matthew Auld <matthew.auld@intel.com>
To: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>,
christian.koenig@amd.com, amd-gfx@lists.freedesktop.org,
dri-devel@lists.freedesktop.org, intel-gfx@lists.freedesktop.org,
intel-xe@lists.freedesktop.org
Cc: alexander.deucher@amd.com
Subject: Re: [RFC PATCH] drm/buddy: Optimize large alignment handling to avoid unnecessary splits
Date: Fri, 9 Jan 2026 16:42:11 +0000 [thread overview]
Message-ID: <b2aa28aa-ce9c-4948-9bed-289700f4eb4a@intel.com> (raw)
In-Reply-To: <20251211122319.2054-1-Arunpravin.PaneerSelvam@amd.com>
On 11/12/2025 12:23, Arunpravin Paneer Selvam wrote:
> Large alignment requests previously forced the allocator to search by
> alignment order, causing large free blocks to be split even when a
> smaller aligned range existed within them. This patch switches the
> search to prioritize the requested size and uses an augmented RB-tree
> field (subtree_max_alignment) to efficiently locate blocks that satisfy
> both size and alignment. This prevents unnecessary block splitting and
> significantly reduces fragmentation.
>
> A practical example is the VKCTS test
> dEQP-VK.memory.allocation.basic.size_8KiB.reverse.count_4000, which
> allocates 8 KiB buffers with a 256 KiB alignment. Previously, these
> requests caused the allocator to split large blocks despite having
> smaller aligned portions within them that could satisfy the allocation.
> The new design now identifies and allocates from these portions,
> avoiding unnecessary splitting.
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> Suggested-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/drm_buddy.c | 205 +++++++++++++++++++++++++++++++++---
> include/drm/drm_buddy.h | 3 +
> 2 files changed, 191 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index f2c92902e4a3..f749814bb270 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -23,6 +23,18 @@ static struct kmem_cache *slab_blocks;
> #define for_each_free_tree(tree) \
> for ((tree) = 0; (tree) < DRM_BUDDY_MAX_FREE_TREES; (tree)++)
>
> +static unsigned int drm_buddy_min_offset_or_size_order(struct drm_buddy_block *block)
> +{
> + return min_t(unsigned int,
> + __ffs(drm_buddy_block_offset(block)),
> + drm_buddy_block_order(block));
Didn't quite get this bit. Why do we pick the min between the order and
"alignment"? Say we have order zero block but is has 256K addr alignment
this just selects zero? What is the idea here?
> +}
> +
> +RB_DECLARE_CALLBACKS_MAX(static, drm_buddy_augment_cb,
> + struct drm_buddy_block, rb,
> + unsigned int, subtree_max_alignment,
> + drm_buddy_min_offset_or_size_order);
> +
> static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
> struct drm_buddy_block *parent,
> unsigned int order,
> @@ -40,6 +52,9 @@ static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
> block->header |= order;
> block->parent = parent;
>
> + block->subtree_max_alignment =
> + drm_buddy_min_offset_or_size_order(block);
> +
> RB_CLEAR_NODE(&block->rb);
>
> BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
> @@ -76,26 +91,32 @@ static bool rbtree_is_empty(struct rb_root *root)
> return RB_EMPTY_ROOT(root);
> }
>
> -static bool drm_buddy_block_offset_less(const struct drm_buddy_block *block,
> - const struct drm_buddy_block *node)
> -{
> - return drm_buddy_block_offset(block) < drm_buddy_block_offset(node);
> -}
> -
> -static bool rbtree_block_offset_less(struct rb_node *block,
> - const struct rb_node *node)
> -{
> - return drm_buddy_block_offset_less(rbtree_get_free_block(block),
> - rbtree_get_free_block(node));
> -}
> -
> static void rbtree_insert(struct drm_buddy *mm,
> struct drm_buddy_block *block,
> enum drm_buddy_free_tree tree)
> {
> - rb_add(&block->rb,
> - &mm->free_trees[tree][drm_buddy_block_order(block)],
> - rbtree_block_offset_less);
> + struct rb_node **link, *parent = NULL;
> + struct drm_buddy_block *node;
> + struct rb_root *root;
> + unsigned int order;
> +
> + order = drm_buddy_block_order(block);
> +
> + root = &mm->free_trees[tree][order];
> + link = &root->rb_node;
> +
> + while (*link) {
> + parent = *link;
> + node = rbtree_get_free_block(parent);
> +
> + if (drm_buddy_block_offset(block) < drm_buddy_block_offset(node))
> + link = &parent->rb_left;
> + else
> + link = &parent->rb_right;
Is this correct? From the docs it sounds like we are meant to update the
max alignment for each subtree on the path leading up to the insertion?
It looks like insert_augmentated will only do it if there is something
to be rebalanced.
> + }
> +
> + rb_link_node(&block->rb, parent, link);
> + rb_insert_augmented(&block->rb, root, &drm_buddy_augment_cb);
> }
>
> static void rbtree_remove(struct drm_buddy *mm,
> @@ -108,7 +129,7 @@ static void rbtree_remove(struct drm_buddy *mm,
> tree = get_block_tree(block);
> root = &mm->free_trees[tree][order];
>
> - rb_erase(&block->rb, root);
> + rb_erase_augmented(&block->rb, root, &drm_buddy_augment_cb);
> RB_CLEAR_NODE(&block->rb);
> }
>
> @@ -596,6 +617,88 @@ static bool block_incompatible(struct drm_buddy_block *block, unsigned int flags
> return needs_clear != drm_buddy_block_is_clear(block);
> }
>
> +static bool drm_buddy_subtree_can_satisfy(struct rb_node *node,
> + unsigned int alignment)
> +{
> + struct drm_buddy_block *block;
> +
> + if (!node)
> + return false;
> +
> + block = rbtree_get_free_block(node);
> + return block->subtree_max_alignment >= alignment;
> +}
> +
> +static struct drm_buddy_block *
> +drm_buddy_find_block_aligned(struct drm_buddy *mm,
> + enum drm_buddy_free_tree tree,
> + unsigned int order,
> + unsigned int tmp,
> + unsigned int alignment,
> + unsigned long flags)
> +{
> + struct rb_root *root = &mm->free_trees[tree][tmp];
> + struct rb_node *rb = root->rb_node;
> +
> + /* Try to find a block of the requested size that is already aligned */
> + while (rb) {
> + struct drm_buddy_block *block = rbtree_get_free_block(rb);
> + struct rb_node *left_node = rb->rb_left, *right_node = rb->rb_right;
> +
> + if (left_node) {
> + if (drm_buddy_subtree_can_satisfy(left_node, alignment)) {
> + rb = left_node;
> + continue;
> + }
> + }
> +
> + if (drm_buddy_block_order(block) >= order &&
> + __ffs(drm_buddy_block_offset(block)) >= alignment)
> + return block;
> +
> + if (right_node) {
> + if (drm_buddy_subtree_can_satisfy(right_node, alignment)) {
> + rb = right_node;
> + continue;
> + }
> + }
> +
> + break;
> + }
> +
> + if (tmp < max(order, alignment))
> + return NULL;
> +
> + /* If none found, look for a larger block that can satisfy the alignment */
What is the idea here? IIUC we are looking at some specific order and we
want some min addr alignment, if the above can't find any subtree with
suitable max alignment then we should bail and try the next order? Why
instead do we do the search again with the same alignment below?
> + rb = root->rb_node;
> + while (rb) {
> + struct drm_buddy_block *block = rbtree_get_free_block(rb);
> + struct rb_node *left_node = rb->rb_left, *right_node = rb->rb_right;
> +
> + if (left_node) {
> + if (drm_buddy_subtree_can_satisfy(left_node, alignment)) {
> + rb = left_node;
> + continue;
> + }
> + }
> +
> + if (drm_buddy_block_order(block) >= max(order, alignment) &&
> + drm_buddy_min_offset_or_size_order(block) >= alignment)
> + return block;
> +
> + if (right_node) {
> + if (drm_buddy_subtree_can_satisfy(right_node, alignment)) {
> + rb = right_node;
> + continue;
> + }
> + }
> +
> + break;
> + }
> +
> + return NULL;
> +}
> +
> static struct drm_buddy_block *
> __alloc_range_bias(struct drm_buddy *mm,
> u64 start, u64 end,
> @@ -798,6 +901,69 @@ alloc_from_freetree(struct drm_buddy *mm,
> return ERR_PTR(err);
> }
>
> +static int drm_buddy_offset_aligned_allocation(struct drm_buddy *mm,
> + u64 size,
> + u64 min_block_size,
> + unsigned long flags,
> + struct list_head *blocks)
> +{
> + struct drm_buddy_block *block = NULL;
> + unsigned int order, tmp, alignment;
> + enum drm_buddy_free_tree tree;
> + unsigned long pages;
> +
> + alignment = ilog2(min_block_size);
> + pages = size >> ilog2(mm->chunk_size);
> + order = fls(pages) - 1;
> +
> + tree = (flags & DRM_BUDDY_CLEAR_ALLOCATION) ?
> + DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
> +
> + for (tmp = order; tmp <= mm->max_order; ++tmp) {
> + block = drm_buddy_find_block_aligned(mm, tree, order,
> + tmp, alignment, flags);
> + if (!block) {
> + tree = (tree == DRM_BUDDY_CLEAR_TREE) ?
> + DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE;
> + block = drm_buddy_find_block_aligned(mm, tree, order,
> + tmp, alignment, flags);
> + }
> +
> + if (block)
> + break;
> + }
> +
> + if (!block)
> + return -ENOSPC;
> +
> + while (drm_buddy_block_order(block) > order) {
> + unsigned int child_order = drm_buddy_block_order(block) - 1;
> + struct drm_buddy_block *left, *right;
> + int r;
> +
> + r = split_block(mm, block);
> + if (r)
> + return r;
> +
> + left = block->left;
> + right = block->right;
> +
> + if (child_order >= alignment)
> + block = right;
> + else
> + block = left;
> + }
> +
> + mark_allocated(mm, block);
> + mm->avail -= drm_buddy_block_size(mm, block);
> + if (drm_buddy_block_is_clear(block))
> + mm->clear_avail -= drm_buddy_block_size(mm, block);
> + kmemleak_update_trace(block);
> + list_add_tail(&block->link, blocks);
> +
> + return 0;
> +}
> +
> static int __alloc_range(struct drm_buddy *mm,
> struct list_head *dfs,
> u64 start, u64 size,
> @@ -1147,6 +1313,11 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> min_block_size = size;
> /* Align size value to min_block_size */
> } else if (!IS_ALIGNED(size, min_block_size)) {
> + if (min_block_size > size && is_power_of_2(size))
> + return drm_buddy_offset_aligned_allocation(mm, size,
> + min_block_size,
> + flags,
> + blocks);
> size = round_up(size, min_block_size);
> }
>
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index d7891d08f67a..da6a40fb4763 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -11,6 +11,7 @@
> #include <linux/slab.h>
> #include <linux/sched.h>
> #include <linux/rbtree.h>
> +#include <linux/rbtree_augmented.h>
>
> #include <drm/drm_print.h>
>
> @@ -60,6 +61,8 @@ struct drm_buddy_block {
> };
>
> struct list_head tmp_link;
> +
> + unsigned int subtree_max_alignment;
> };
>
> /* Order-zero must be at least SZ_4K */
next prev parent reply other threads:[~2026-01-09 16:42 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-12-11 12:23 [RFC PATCH] drm/buddy: Optimize large alignment handling to avoid unnecessary splits Arunpravin Paneer Selvam
2025-12-11 13:24 ` ✓ i915.CI.BAT: success for " Patchwork
2026-01-09 16:42 ` Matthew Auld [this message]
2026-01-22 14:13 ` [RFC PATCH] " Arunpravin Paneer Selvam
2026-01-22 15:02 ` Matthew Auld
2026-01-29 13:19 ` Arunpravin Paneer Selvam
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=b2aa28aa-ce9c-4948-9bed-289700f4eb4a@intel.com \
--to=matthew.auld@intel.com \
--cc=Arunpravin.PaneerSelvam@amd.com \
--cc=alexander.deucher@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
--cc=christian.koenig@amd.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=intel-gfx@lists.freedesktop.org \
--cc=intel-xe@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox