* [PATCH 0/2] gpu/buddy: Per-order free and used block scoreboards
@ 2026-05-04 13:52 Francois Dugast
2026-05-04 13:52 ` [PATCH 1/2] gpu/buddy: Track per-order free blocks with a scoreboard Francois Dugast
2026-05-04 13:52 ` [PATCH 2/2] gpu/buddy: Track per-order used " Francois Dugast
0 siblings, 2 replies; 5+ messages in thread
From: Francois Dugast @ 2026-05-04 13:52 UTC (permalink / raw)
To: dri-devel; +Cc: intel-xe, matthew.auld, Francois Dugast
drm_buddy_print() currently reports per-order free block counts by
walking all rbtrees, which is O(n) in the total number of free blocks
and holds the allocator lock for the duration. On large VRAM heaps with
many small fragments this becomes expensive.
This series replaces the rbtree walk with two lightweight scoreboard
arrays — free_scoreboard and used_scoreboard — indexed by order and
maintained incrementally at the points where block state transitions
occur. The print functions become simple array lookups, and drivers
reading debugfs (/sys/kernel/debug/dri/0/tile0/vram_mm) now get both
free and used counts per order at O(1) cost.
Francois Dugast (2):
gpu/buddy: Track per-order free blocks with a scoreboard
gpu/buddy: Track per-order used blocks with a scoreboard
drivers/gpu/buddy.c | 60 +++++++++++++++++++++++++------------
drivers/gpu/drm/drm_buddy.c | 20 ++++---------
include/linux/gpu_buddy.h | 15 ++++++++++
3 files changed, 61 insertions(+), 34 deletions(-)
--
2.43.0
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 1/2] gpu/buddy: Track per-order free blocks with a scoreboard
2026-05-04 13:52 [PATCH 0/2] gpu/buddy: Per-order free and used block scoreboards Francois Dugast
@ 2026-05-04 13:52 ` Francois Dugast
2026-05-08 14:09 ` Matthew Auld
2026-05-04 13:52 ` [PATCH 2/2] gpu/buddy: Track per-order used " Francois Dugast
1 sibling, 1 reply; 5+ messages in thread
From: Francois Dugast @ 2026-05-04 13:52 UTC (permalink / raw)
To: dri-devel; +Cc: intel-xe, matthew.auld, Francois Dugast
Reporting per-order free block counts in drm_buddy_print() currently
requires walking all rbtrees, which is O(n) over the total number of
free blocks and holds the allocator lock for the duration. This becomes
expensive on large VRAM heaps with many small free fragments.
Maintain a free_scoreboard[] array indexed by order instead, so that
the count for any order is always available in O(1). The scoreboard is
kept accurate by hooking into the four places where a block's free state
changes: mark_free(), mark_allocated(), mark_split(), and the two sites
in __gpu_buddy_free() and __force_merge() that call rbtree_remove()
directly on free blocks without going through mark_*().
The print functions are simplified as a result: the rbtree traversal
is replaced by a direct array lookup.
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Assisted-by: GitHub Copilot:claude-sonnet-4.6
---
drivers/gpu/buddy.c | 35 ++++++++++++++++++++---------------
drivers/gpu/drm/drm_buddy.c | 16 ++--------------
include/linux/gpu_buddy.h | 7 +++++++
3 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/buddy.c b/drivers/gpu/buddy.c
index 52686672e99f..d831165e87ea 100644
--- a/drivers/gpu/buddy.c
+++ b/drivers/gpu/buddy.c
@@ -193,6 +193,8 @@ static void mark_allocated(struct gpu_buddy *mm,
block->header &= ~GPU_BUDDY_HEADER_STATE;
block->header |= GPU_BUDDY_ALLOCATED;
+ mm->free_scoreboard[gpu_buddy_block_order(block)]--;
+
rbtree_remove(mm, block);
}
@@ -204,6 +206,8 @@ static void mark_free(struct gpu_buddy *mm,
block->header &= ~GPU_BUDDY_HEADER_STATE;
block->header |= GPU_BUDDY_FREE;
+ mm->free_scoreboard[gpu_buddy_block_order(block)]++;
+
tree = get_block_tree(block);
rbtree_insert(mm, block, tree);
}
@@ -214,6 +218,8 @@ static void mark_split(struct gpu_buddy *mm,
block->header &= ~GPU_BUDDY_HEADER_STATE;
block->header |= GPU_BUDDY_SPLIT;
+ mm->free_scoreboard[gpu_buddy_block_order(block)]--;
+
rbtree_remove(mm, block);
}
@@ -271,6 +277,7 @@ static unsigned int __gpu_buddy_free(struct gpu_buddy *mm,
}
rbtree_remove(mm, buddy);
+ mm->free_scoreboard[gpu_buddy_block_order(buddy)]--;
if (force_merge && gpu_buddy_block_is_clear(buddy))
mm->clear_avail -= gpu_buddy_block_size(mm, buddy);
@@ -335,6 +342,7 @@ static int __force_merge(struct gpu_buddy *mm,
iter = rb_prev(iter);
rbtree_remove(mm, block);
+ mm->free_scoreboard[gpu_buddy_block_order(block)]--;
if (gpu_buddy_block_is_clear(block))
mm->clear_avail -= gpu_buddy_block_size(mm, block);
@@ -384,11 +392,17 @@ int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size)
BUG_ON(mm->max_order > GPU_BUDDY_MAX_ORDER);
+ mm->free_scoreboard = kcalloc(mm->max_order + 1,
+ sizeof(*mm->free_scoreboard),
+ GFP_KERNEL);
+ if (!mm->free_scoreboard)
+ return -ENOMEM;
+
mm->free_trees = kmalloc_array(GPU_BUDDY_MAX_FREE_TREES,
sizeof(*mm->free_trees),
GFP_KERNEL);
if (!mm->free_trees)
- return -ENOMEM;
+ goto out_free_scoreboard;
for_each_free_tree(i) {
mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
@@ -447,6 +461,8 @@ int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size)
while (i--)
kfree(mm->free_trees[i]);
kfree(mm->free_trees);
+out_free_scoreboard:
+ kfree(mm->free_scoreboard);
return -ENOMEM;
}
EXPORT_SYMBOL(gpu_buddy_init);
@@ -485,6 +501,7 @@ void gpu_buddy_fini(struct gpu_buddy *mm)
kfree(mm->free_trees[i]);
kfree(mm->free_trees);
kfree(mm->roots);
+ kfree(mm->free_scoreboard);
}
EXPORT_SYMBOL(gpu_buddy_fini);
@@ -1479,21 +1496,9 @@ void gpu_buddy_print(struct gpu_buddy *mm)
mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
for (order = mm->max_order; order >= 0; order--) {
- struct gpu_buddy_block *block, *tmp;
- struct rb_root *root;
- u64 count = 0, free;
- unsigned int tree;
-
- for_each_free_tree(tree) {
- root = &mm->free_trees[tree][order];
-
- rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
- BUG_ON(!gpu_buddy_block_is_free(block));
- count++;
- }
- }
+ u64 count = mm->free_scoreboard[order];
+ u64 free = count * (mm->chunk_size << order);
- free = count * (mm->chunk_size << order);
if (free < SZ_1M)
pr_info("order-%2d free: %8llu KiB, blocks: %llu\n",
order, free >> 10, count);
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 841f3de5f307..7839b54d3da7 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -46,23 +46,11 @@ void drm_buddy_print(struct gpu_buddy *mm, struct drm_printer *p)
mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
for (order = mm->max_order; order >= 0; order--) {
- struct gpu_buddy_block *block, *tmp;
- struct rb_root *root;
- u64 count = 0, free;
- unsigned int tree;
-
- for_each_free_tree(tree) {
- root = &mm->free_trees[tree][order];
-
- rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
- BUG_ON(!gpu_buddy_block_is_free(block));
- count++;
- }
- }
+ u64 count = mm->free_scoreboard[order];
+ u64 free = count * (mm->chunk_size << order);
drm_printf(p, "order-%2d ", order);
- free = count * (mm->chunk_size << order);
if (free < SZ_1M)
drm_printf(p, "free: %8llu KiB", free >> 10);
else
diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h
index 5fa917ba5450..250841ca4bcf 100644
--- a/include/linux/gpu_buddy.h
+++ b/include/linux/gpu_buddy.h
@@ -172,6 +172,13 @@ struct gpu_buddy {
* that fits in the remaining space.
*/
struct gpu_buddy_block **roots;
+ /*
+ * Per-order free block scoreboard: free_scoreboard[order] holds the
+ * number of blocks of that order currently in the free state.
+ * Incremented in mark_free(), decremented in mark_allocated() and
+ * mark_split() when a block leaves the free state.
+ */
+ u64 *free_scoreboard;
/* public: */
unsigned int n_roots;
unsigned int max_order;
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/2] gpu/buddy: Track per-order used blocks with a scoreboard
2026-05-04 13:52 [PATCH 0/2] gpu/buddy: Per-order free and used block scoreboards Francois Dugast
2026-05-04 13:52 ` [PATCH 1/2] gpu/buddy: Track per-order free blocks with a scoreboard Francois Dugast
@ 2026-05-04 13:52 ` Francois Dugast
2026-05-08 14:13 ` Matthew Auld
1 sibling, 1 reply; 5+ messages in thread
From: Francois Dugast @ 2026-05-04 13:52 UTC (permalink / raw)
To: dri-devel; +Cc: intel-xe, matthew.auld, Francois Dugast
Extend the scoreboard approach from the previous commit to used blocks,
so drm_buddy_print() can report per-order allocation pressure in O(1).
Unlike free blocks, an allocated block can leave the allocated state
through mark_free() (normal free and gpu_buddy_block_trim()) or be
consumed directly by gpu_block_free() during coalescing. Both sites are
guarded by gpu_buddy_block_is_allocated() and paired with the increment
in mark_allocated().
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Assisted-by: GitHub Copilot:claude-sonnet-4.6
---
drivers/gpu/buddy.c | 29 +++++++++++++++++++++++------
drivers/gpu/drm/drm_buddy.c | 8 +++++---
include/linux/gpu_buddy.h | 8 ++++++++
3 files changed, 36 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/buddy.c b/drivers/gpu/buddy.c
index d831165e87ea..ebef03613b3f 100644
--- a/drivers/gpu/buddy.c
+++ b/drivers/gpu/buddy.c
@@ -194,6 +194,7 @@ static void mark_allocated(struct gpu_buddy *mm,
block->header |= GPU_BUDDY_ALLOCATED;
mm->free_scoreboard[gpu_buddy_block_order(block)]--;
+ mm->used_scoreboard[gpu_buddy_block_order(block)]++;
rbtree_remove(mm, block);
}
@@ -203,6 +204,9 @@ static void mark_free(struct gpu_buddy *mm,
{
enum gpu_buddy_free_tree tree;
+ if (gpu_buddy_block_is_allocated(block))
+ mm->used_scoreboard[gpu_buddy_block_order(block)]--;
+
block->header &= ~GPU_BUDDY_HEADER_STATE;
block->header |= GPU_BUDDY_FREE;
@@ -281,6 +285,9 @@ static unsigned int __gpu_buddy_free(struct gpu_buddy *mm,
if (force_merge && gpu_buddy_block_is_clear(buddy))
mm->clear_avail -= gpu_buddy_block_size(mm, buddy);
+ if (gpu_buddy_block_is_allocated(block))
+ mm->used_scoreboard[gpu_buddy_block_order(block)]--;
+
gpu_block_free(mm, block);
gpu_block_free(mm, buddy);
@@ -398,6 +405,12 @@ int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size)
if (!mm->free_scoreboard)
return -ENOMEM;
+ mm->used_scoreboard = kcalloc(mm->max_order + 1,
+ sizeof(*mm->used_scoreboard),
+ GFP_KERNEL);
+ if (!mm->used_scoreboard)
+ goto out_free_free_scoreboard;
+
mm->free_trees = kmalloc_array(GPU_BUDDY_MAX_FREE_TREES,
sizeof(*mm->free_trees),
GFP_KERNEL);
@@ -462,6 +475,8 @@ int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size)
kfree(mm->free_trees[i]);
kfree(mm->free_trees);
out_free_scoreboard:
+ kfree(mm->used_scoreboard);
+out_free_free_scoreboard:
kfree(mm->free_scoreboard);
return -ENOMEM;
}
@@ -502,6 +517,7 @@ void gpu_buddy_fini(struct gpu_buddy *mm)
kfree(mm->free_trees);
kfree(mm->roots);
kfree(mm->free_scoreboard);
+ kfree(mm->used_scoreboard);
}
EXPORT_SYMBOL(gpu_buddy_fini);
@@ -1496,15 +1512,16 @@ void gpu_buddy_print(struct gpu_buddy *mm)
mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
for (order = mm->max_order; order >= 0; order--) {
- u64 count = mm->free_scoreboard[order];
- u64 free = count * (mm->chunk_size << order);
+ u64 free_count = mm->free_scoreboard[order];
+ u64 used_count = mm->used_scoreboard[order];
+ u64 free = free_count * (mm->chunk_size << order);
if (free < SZ_1M)
- pr_info("order-%2d free: %8llu KiB, blocks: %llu\n",
- order, free >> 10, count);
+ pr_info("order-%2d free: %8llu KiB, free_blocks: %llu, used_blocks: %llu\n",
+ order, free >> 10, free_count, used_count);
else
- pr_info("order-%2d free: %8llu MiB, blocks: %llu\n",
- order, free >> 20, count);
+ pr_info("order-%2d free: %8llu MiB, free_blocks: %llu, used_blocks: %llu\n",
+ order, free >> 20, free_count, used_count);
}
}
EXPORT_SYMBOL(gpu_buddy_print);
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 7839b54d3da7..3a1cb06923c6 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -46,8 +46,9 @@ void drm_buddy_print(struct gpu_buddy *mm, struct drm_printer *p)
mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
for (order = mm->max_order; order >= 0; order--) {
- u64 count = mm->free_scoreboard[order];
- u64 free = count * (mm->chunk_size << order);
+ u64 free_count = mm->free_scoreboard[order];
+ u64 used_count = mm->used_scoreboard[order];
+ u64 free = free_count * (mm->chunk_size << order);
drm_printf(p, "order-%2d ", order);
@@ -56,7 +57,8 @@ void drm_buddy_print(struct gpu_buddy *mm, struct drm_printer *p)
else
drm_printf(p, "free: %8llu MiB", free >> 20);
- drm_printf(p, ", blocks: %llu\n", count);
+ drm_printf(p, ", free_blocks: %llu, used_blocks: %llu\n",
+ free_count, used_count);
}
}
EXPORT_SYMBOL(drm_buddy_print);
diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h
index 250841ca4bcf..b1cad7833dc1 100644
--- a/include/linux/gpu_buddy.h
+++ b/include/linux/gpu_buddy.h
@@ -179,6 +179,14 @@ struct gpu_buddy {
* mark_split() when a block leaves the free state.
*/
u64 *free_scoreboard;
+ /*
+ * Per-order used block scoreboard: used_scoreboard[order] holds the
+ * number of blocks of that order currently in the allocated state.
+ * Incremented in mark_allocated(), decremented in
+ * gpu_buddy_free_block() which is the sole entry point for freeing
+ * allocated blocks.
+ */
+ u64 *used_scoreboard;
/* public: */
unsigned int n_roots;
unsigned int max_order;
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] gpu/buddy: Track per-order free blocks with a scoreboard
2026-05-04 13:52 ` [PATCH 1/2] gpu/buddy: Track per-order free blocks with a scoreboard Francois Dugast
@ 2026-05-08 14:09 ` Matthew Auld
0 siblings, 0 replies; 5+ messages in thread
From: Matthew Auld @ 2026-05-08 14:09 UTC (permalink / raw)
To: Francois Dugast, dri-devel, Paneer Selvam, Arunpravin; +Cc: intel-xe
On 04/05/2026 14:52, Francois Dugast wrote:
> Reporting per-order free block counts in drm_buddy_print() currently
> requires walking all rbtrees, which is O(n) over the total number of
> free blocks and holds the allocator lock for the duration. This becomes
> expensive on large VRAM heaps with many small free fragments.
>
> Maintain a free_scoreboard[] array indexed by order instead, so that
> the count for any order is always available in O(1). The scoreboard is
> kept accurate by hooking into the four places where a block's free state
> changes: mark_free(), mark_allocated(), mark_split(), and the two sites
> in __gpu_buddy_free() and __force_merge() that call rbtree_remove()
> directly on free blocks without going through mark_*().
>
> The print functions are simplified as a result: the rbtree traversal
> is replaced by a direct array lookup.
>
> Signed-off-by: Francois Dugast <francois.dugast@intel.com>
> Assisted-by: GitHub Copilot:claude-sonnet-4.6
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
> ---
> drivers/gpu/buddy.c | 35 ++++++++++++++++++++---------------
> drivers/gpu/drm/drm_buddy.c | 16 ++--------------
> include/linux/gpu_buddy.h | 7 +++++++
> 3 files changed, 29 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/buddy.c b/drivers/gpu/buddy.c
> index 52686672e99f..d831165e87ea 100644
> --- a/drivers/gpu/buddy.c
> +++ b/drivers/gpu/buddy.c
> @@ -193,6 +193,8 @@ static void mark_allocated(struct gpu_buddy *mm,
> block->header &= ~GPU_BUDDY_HEADER_STATE;
> block->header |= GPU_BUDDY_ALLOCATED;
>
> + mm->free_scoreboard[gpu_buddy_block_order(block)]--;
> +
> rbtree_remove(mm, block);
> }
>
> @@ -204,6 +206,8 @@ static void mark_free(struct gpu_buddy *mm,
> block->header &= ~GPU_BUDDY_HEADER_STATE;
> block->header |= GPU_BUDDY_FREE;
>
> + mm->free_scoreboard[gpu_buddy_block_order(block)]++;
> +
> tree = get_block_tree(block);
> rbtree_insert(mm, block, tree);
> }
> @@ -214,6 +218,8 @@ static void mark_split(struct gpu_buddy *mm,
> block->header &= ~GPU_BUDDY_HEADER_STATE;
> block->header |= GPU_BUDDY_SPLIT;
>
> + mm->free_scoreboard[gpu_buddy_block_order(block)]--;
> +
> rbtree_remove(mm, block);
> }
>
> @@ -271,6 +277,7 @@ static unsigned int __gpu_buddy_free(struct gpu_buddy *mm,
> }
>
> rbtree_remove(mm, buddy);
> + mm->free_scoreboard[gpu_buddy_block_order(buddy)]--;
> if (force_merge && gpu_buddy_block_is_clear(buddy))
> mm->clear_avail -= gpu_buddy_block_size(mm, buddy);
>
> @@ -335,6 +342,7 @@ static int __force_merge(struct gpu_buddy *mm,
> iter = rb_prev(iter);
>
> rbtree_remove(mm, block);
> + mm->free_scoreboard[gpu_buddy_block_order(block)]--;
> if (gpu_buddy_block_is_clear(block))
> mm->clear_avail -= gpu_buddy_block_size(mm, block);
>
> @@ -384,11 +392,17 @@ int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size)
>
> BUG_ON(mm->max_order > GPU_BUDDY_MAX_ORDER);
>
> + mm->free_scoreboard = kcalloc(mm->max_order + 1,
> + sizeof(*mm->free_scoreboard),
> + GFP_KERNEL);
> + if (!mm->free_scoreboard)
> + return -ENOMEM;
> +
> mm->free_trees = kmalloc_array(GPU_BUDDY_MAX_FREE_TREES,
> sizeof(*mm->free_trees),
> GFP_KERNEL);
> if (!mm->free_trees)
> - return -ENOMEM;
> + goto out_free_scoreboard;
>
> for_each_free_tree(i) {
> mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
> @@ -447,6 +461,8 @@ int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size)
> while (i--)
> kfree(mm->free_trees[i]);
> kfree(mm->free_trees);
> +out_free_scoreboard:
> + kfree(mm->free_scoreboard);
> return -ENOMEM;
> }
> EXPORT_SYMBOL(gpu_buddy_init);
> @@ -485,6 +501,7 @@ void gpu_buddy_fini(struct gpu_buddy *mm)
> kfree(mm->free_trees[i]);
> kfree(mm->free_trees);
> kfree(mm->roots);
> + kfree(mm->free_scoreboard);
> }
> EXPORT_SYMBOL(gpu_buddy_fini);
>
> @@ -1479,21 +1496,9 @@ void gpu_buddy_print(struct gpu_buddy *mm)
> mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
>
> for (order = mm->max_order; order >= 0; order--) {
> - struct gpu_buddy_block *block, *tmp;
> - struct rb_root *root;
> - u64 count = 0, free;
> - unsigned int tree;
> -
> - for_each_free_tree(tree) {
> - root = &mm->free_trees[tree][order];
> -
> - rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
> - BUG_ON(!gpu_buddy_block_is_free(block));
> - count++;
> - }
> - }
> + u64 count = mm->free_scoreboard[order];
> + u64 free = count * (mm->chunk_size << order);
>
> - free = count * (mm->chunk_size << order);
> if (free < SZ_1M)
> pr_info("order-%2d free: %8llu KiB, blocks: %llu\n",
> order, free >> 10, count);
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index 841f3de5f307..7839b54d3da7 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -46,23 +46,11 @@ void drm_buddy_print(struct gpu_buddy *mm, struct drm_printer *p)
> mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
>
> for (order = mm->max_order; order >= 0; order--) {
> - struct gpu_buddy_block *block, *tmp;
> - struct rb_root *root;
> - u64 count = 0, free;
> - unsigned int tree;
> -
> - for_each_free_tree(tree) {
> - root = &mm->free_trees[tree][order];
> -
> - rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
> - BUG_ON(!gpu_buddy_block_is_free(block));
> - count++;
> - }
> - }
> + u64 count = mm->free_scoreboard[order];
> + u64 free = count * (mm->chunk_size << order);
>
> drm_printf(p, "order-%2d ", order);
>
> - free = count * (mm->chunk_size << order);
> if (free < SZ_1M)
> drm_printf(p, "free: %8llu KiB", free >> 10);
> else
> diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h
> index 5fa917ba5450..250841ca4bcf 100644
> --- a/include/linux/gpu_buddy.h
> +++ b/include/linux/gpu_buddy.h
> @@ -172,6 +172,13 @@ struct gpu_buddy {
> * that fits in the remaining space.
> */
> struct gpu_buddy_block **roots;
> + /*
> + * Per-order free block scoreboard: free_scoreboard[order] holds the
> + * number of blocks of that order currently in the free state.
> + * Incremented in mark_free(), decremented in mark_allocated() and
> + * mark_split() when a block leaves the free state.
> + */
> + u64 *free_scoreboard;
> /* public: */
> unsigned int n_roots;
> unsigned int max_order;
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 2/2] gpu/buddy: Track per-order used blocks with a scoreboard
2026-05-04 13:52 ` [PATCH 2/2] gpu/buddy: Track per-order used " Francois Dugast
@ 2026-05-08 14:13 ` Matthew Auld
0 siblings, 0 replies; 5+ messages in thread
From: Matthew Auld @ 2026-05-08 14:13 UTC (permalink / raw)
To: Francois Dugast, dri-devel, Paneer Selvam, Arunpravin; +Cc: intel-xe
On 04/05/2026 14:52, Francois Dugast wrote:
> Extend the scoreboard approach from the previous commit to used blocks,
> so drm_buddy_print() can report per-order allocation pressure in O(1).
>
> Unlike free blocks, an allocated block can leave the allocated state
> through mark_free() (normal free and gpu_buddy_block_trim()) or be
> consumed directly by gpu_block_free() during coalescing. Both sites are
> guarded by gpu_buddy_block_is_allocated() and paired with the increment
> in mark_allocated().
>
> Signed-off-by: Francois Dugast <francois.dugast@intel.com>
> Assisted-by: GitHub Copilot:claude-sonnet-4.6
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Note that Sashiko found an existing issue here:
https://sashiko.dev/#/patchset/20260504135343.1797869-1-francois.dugast%40intel.com
I guess would be good to fix that first, in this series?
> ---
> drivers/gpu/buddy.c | 29 +++++++++++++++++++++++------
> drivers/gpu/drm/drm_buddy.c | 8 +++++---
> include/linux/gpu_buddy.h | 8 ++++++++
> 3 files changed, 36 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/buddy.c b/drivers/gpu/buddy.c
> index d831165e87ea..ebef03613b3f 100644
> --- a/drivers/gpu/buddy.c
> +++ b/drivers/gpu/buddy.c
> @@ -194,6 +194,7 @@ static void mark_allocated(struct gpu_buddy *mm,
> block->header |= GPU_BUDDY_ALLOCATED;
>
> mm->free_scoreboard[gpu_buddy_block_order(block)]--;
> + mm->used_scoreboard[gpu_buddy_block_order(block)]++;
>
> rbtree_remove(mm, block);
> }
> @@ -203,6 +204,9 @@ static void mark_free(struct gpu_buddy *mm,
> {
> enum gpu_buddy_free_tree tree;
>
> + if (gpu_buddy_block_is_allocated(block))
> + mm->used_scoreboard[gpu_buddy_block_order(block)]--;
> +
> block->header &= ~GPU_BUDDY_HEADER_STATE;
> block->header |= GPU_BUDDY_FREE;
>
> @@ -281,6 +285,9 @@ static unsigned int __gpu_buddy_free(struct gpu_buddy *mm,
> if (force_merge && gpu_buddy_block_is_clear(buddy))
> mm->clear_avail -= gpu_buddy_block_size(mm, buddy);
>
> + if (gpu_buddy_block_is_allocated(block))
> + mm->used_scoreboard[gpu_buddy_block_order(block)]--;
> +
> gpu_block_free(mm, block);
> gpu_block_free(mm, buddy);
>
> @@ -398,6 +405,12 @@ int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size)
> if (!mm->free_scoreboard)
> return -ENOMEM;
>
> + mm->used_scoreboard = kcalloc(mm->max_order + 1,
> + sizeof(*mm->used_scoreboard),
> + GFP_KERNEL);
> + if (!mm->used_scoreboard)
> + goto out_free_free_scoreboard;
> +
> mm->free_trees = kmalloc_array(GPU_BUDDY_MAX_FREE_TREES,
> sizeof(*mm->free_trees),
> GFP_KERNEL);
> @@ -462,6 +475,8 @@ int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size)
> kfree(mm->free_trees[i]);
> kfree(mm->free_trees);
> out_free_scoreboard:
> + kfree(mm->used_scoreboard);
> +out_free_free_scoreboard:
> kfree(mm->free_scoreboard);
> return -ENOMEM;
> }
> @@ -502,6 +517,7 @@ void gpu_buddy_fini(struct gpu_buddy *mm)
> kfree(mm->free_trees);
> kfree(mm->roots);
> kfree(mm->free_scoreboard);
> + kfree(mm->used_scoreboard);
> }
> EXPORT_SYMBOL(gpu_buddy_fini);
>
> @@ -1496,15 +1512,16 @@ void gpu_buddy_print(struct gpu_buddy *mm)
> mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
>
> for (order = mm->max_order; order >= 0; order--) {
> - u64 count = mm->free_scoreboard[order];
> - u64 free = count * (mm->chunk_size << order);
> + u64 free_count = mm->free_scoreboard[order];
> + u64 used_count = mm->used_scoreboard[order];
> + u64 free = free_count * (mm->chunk_size << order);
>
> if (free < SZ_1M)
> - pr_info("order-%2d free: %8llu KiB, blocks: %llu\n",
> - order, free >> 10, count);
> + pr_info("order-%2d free: %8llu KiB, free_blocks: %llu, used_blocks: %llu\n",
> + order, free >> 10, free_count, used_count);
> else
> - pr_info("order-%2d free: %8llu MiB, blocks: %llu\n",
> - order, free >> 20, count);
> + pr_info("order-%2d free: %8llu MiB, free_blocks: %llu, used_blocks: %llu\n",
> + order, free >> 20, free_count, used_count);
> }
> }
> EXPORT_SYMBOL(gpu_buddy_print);
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index 7839b54d3da7..3a1cb06923c6 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -46,8 +46,9 @@ void drm_buddy_print(struct gpu_buddy *mm, struct drm_printer *p)
> mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
>
> for (order = mm->max_order; order >= 0; order--) {
> - u64 count = mm->free_scoreboard[order];
> - u64 free = count * (mm->chunk_size << order);
> + u64 free_count = mm->free_scoreboard[order];
> + u64 used_count = mm->used_scoreboard[order];
> + u64 free = free_count * (mm->chunk_size << order);
>
> drm_printf(p, "order-%2d ", order);
>
> @@ -56,7 +57,8 @@ void drm_buddy_print(struct gpu_buddy *mm, struct drm_printer *p)
> else
> drm_printf(p, "free: %8llu MiB", free >> 20);
>
> - drm_printf(p, ", blocks: %llu\n", count);
> + drm_printf(p, ", free_blocks: %llu, used_blocks: %llu\n",
> + free_count, used_count);
> }
> }
> EXPORT_SYMBOL(drm_buddy_print);
> diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h
> index 250841ca4bcf..b1cad7833dc1 100644
> --- a/include/linux/gpu_buddy.h
> +++ b/include/linux/gpu_buddy.h
> @@ -179,6 +179,14 @@ struct gpu_buddy {
> * mark_split() when a block leaves the free state.
> */
> u64 *free_scoreboard;
> + /*
> + * Per-order used block scoreboard: used_scoreboard[order] holds the
> + * number of blocks of that order currently in the allocated state.
> + * Incremented in mark_allocated(), decremented in
> + * gpu_buddy_free_block() which is the sole entry point for freeing
> + * allocated blocks.
> + */
> + u64 *used_scoreboard;
> /* public: */
> unsigned int n_roots;
> unsigned int max_order;
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2026-05-08 14:13 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-04 13:52 [PATCH 0/2] gpu/buddy: Per-order free and used block scoreboards Francois Dugast
2026-05-04 13:52 ` [PATCH 1/2] gpu/buddy: Track per-order free blocks with a scoreboard Francois Dugast
2026-05-08 14:09 ` Matthew Auld
2026-05-04 13:52 ` [PATCH 2/2] gpu/buddy: Track per-order used " Francois Dugast
2026-05-08 14:13 ` Matthew Auld
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox