* [Intel-gfx] [PATCH v2 1/3] drm/buddy: Improve contiguous memory allocation
@ 2023-09-09 16:09 Arunpravin Paneer Selvam
2023-09-09 16:09 ` [Intel-gfx] [PATCH v2 2/3] drm/amdgpu: Move the size computations to drm buddy Arunpravin Paneer Selvam
` (3 more replies)
0 siblings, 4 replies; 6+ messages in thread
From: Arunpravin Paneer Selvam @ 2023-09-09 16:09 UTC (permalink / raw)
To: dri-devel, amd-gfx, intel-gfx
Cc: alexander.deucher, Arunpravin Paneer Selvam, christian.koenig,
matthew.auld
Problem statement: The current method roundup_power_of_two()
to allocate contiguous address triggers -ENOSPC in some cases
even though we have enough free spaces and so to help with
that we introduce a try harder mechanism.
In case of -ENOSPC, the new try harder mechanism rounddown the
original size to power of 2 and iterating over the round down
sized freelist blocks to allocate the required size traversing
RHS and LHS.
As part of the above new method implementation we moved
contiguous/alignment size computation part and trim function
to the drm buddy file.
v2: Modify the alloc_range() function to return total allocated size
on -ENOSPC err and traverse RHS/LHS to allocate the required
size (Matthew).
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
---
drivers/gpu/drm/drm_buddy.c | 138 ++++++++++++++++++++++++++++++++----
include/drm/drm_buddy.h | 6 +-
2 files changed, 127 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 7098f125b54a..e909eed9cf60 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -480,10 +480,12 @@ alloc_from_freelist(struct drm_buddy *mm,
static int __alloc_range(struct drm_buddy *mm,
struct list_head *dfs,
u64 start, u64 size,
- struct list_head *blocks)
+ struct list_head *blocks,
+ u64 *total_allocated_on_err)
{
struct drm_buddy_block *block;
struct drm_buddy_block *buddy;
+ u64 total_allocated = 0;
LIST_HEAD(allocated);
u64 end;
int err;
@@ -520,6 +522,7 @@ static int __alloc_range(struct drm_buddy *mm,
}
mark_allocated(block);
+ total_allocated += drm_buddy_block_size(mm, block);
mm->avail -= drm_buddy_block_size(mm, block);
list_add_tail(&block->link, &allocated);
continue;
@@ -551,13 +554,20 @@ static int __alloc_range(struct drm_buddy *mm,
__drm_buddy_free(mm, block);
err_free:
- drm_buddy_free_list(mm, &allocated);
+ if (err == -ENOSPC && total_allocated_on_err) {
+ list_splice_tail(&allocated, blocks);
+ *total_allocated_on_err = total_allocated;
+ } else {
+ drm_buddy_free_list(mm, &allocated);
+ }
+
return err;
}
static int __drm_buddy_alloc_range(struct drm_buddy *mm,
u64 start,
u64 size,
+ u64 *total_allocated_on_err,
struct list_head *blocks)
{
LIST_HEAD(dfs);
@@ -566,7 +576,62 @@ static int __drm_buddy_alloc_range(struct drm_buddy *mm,
for (i = 0; i < mm->n_roots; ++i)
list_add_tail(&mm->roots[i]->tmp_link, &dfs);
- return __alloc_range(mm, &dfs, start, size, blocks);
+ return __alloc_range(mm, &dfs, start, size,
+ blocks, total_allocated_on_err);
+}
+
+static int __alloc_contig_try_harder(struct drm_buddy *mm,
+ u64 size,
+ u64 min_block_size,
+ struct list_head *blocks)
+{
+ u64 rhs_offset, lhs_offset, lhs_size, filled;
+ struct drm_buddy_block *block;
+ struct list_head *list;
+ LIST_HEAD(blocks_lhs);
+ unsigned long pages;
+ unsigned int order;
+ u64 modify_size;
+ int err;
+
+ modify_size = rounddown_pow_of_two(size);
+ pages = modify_size >> ilog2(mm->chunk_size);
+ order = fls(pages) - 1;
+ if (order == 0)
+ return -ENOSPC;
+
+ list = &mm->free_list[order];
+ if (list_empty(list))
+ return -ENOSPC;
+
+ list_for_each_entry_reverse(block, list, link) {
+ /* Allocate blocks traversing RHS */
+ rhs_offset = drm_buddy_block_offset(block);
+ err = __drm_buddy_alloc_range(mm, rhs_offset, size,
+ &filled, blocks);
+ if (!err || err != -ENOSPC)
+ return err;
+
+ lhs_size = max((size - filled), min_block_size);
+ if (!IS_ALIGNED(lhs_size, min_block_size))
+ lhs_size = round_up(lhs_size, min_block_size);
+
+ /* Allocate blocks traversing LHS */
+ lhs_offset = drm_buddy_block_offset(block) - lhs_size;
+ err = __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
+ NULL, &blocks_lhs);
+ if (!err) {
+ list_splice(&blocks_lhs, blocks);
+ return 0;
+ } else if (err != -ENOSPC) {
+ drm_buddy_free_list(mm, blocks);
+ return err;
+ }
+ /* Free blocks for the next iteration */
+ drm_buddy_free_list(mm, blocks);
+ }
+
+ return -ENOSPC;
}
/**
@@ -626,7 +691,7 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
new_start = drm_buddy_block_offset(block);
list_add(&block->tmp_link, &dfs);
- err = __alloc_range(mm, &dfs, new_start, new_size, blocks);
+ err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
if (err) {
mark_allocated(block);
mm->avail -= drm_buddy_block_size(mm, block);
@@ -645,7 +710,7 @@ EXPORT_SYMBOL(drm_buddy_block_trim);
* @start: start of the allowed range for this block
* @end: end of the allowed range for this block
* @size: size of the allocation
- * @min_page_size: alignment of the allocation
+ * @min_block_size: alignment of the allocation
* @blocks: output list head to add allocated blocks
* @flags: DRM_BUDDY_*_ALLOCATION flags
*
@@ -660,23 +725,24 @@ EXPORT_SYMBOL(drm_buddy_block_trim);
*/
int drm_buddy_alloc_blocks(struct drm_buddy *mm,
u64 start, u64 end, u64 size,
- u64 min_page_size,
+ u64 min_block_size,
struct list_head *blocks,
unsigned long flags)
{
struct drm_buddy_block *block = NULL;
+ u64 original_size, original_min_size;
unsigned int min_order, order;
- unsigned long pages;
LIST_HEAD(allocated);
+ unsigned long pages;
int err;
if (size < mm->chunk_size)
return -EINVAL;
- if (min_page_size < mm->chunk_size)
+ if (min_block_size < mm->chunk_size)
return -EINVAL;
- if (!is_power_of_2(min_page_size))
+ if (!is_power_of_2(min_block_size))
return -EINVAL;
if (!IS_ALIGNED(start | end | size, mm->chunk_size))
@@ -690,14 +756,23 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
/* Actual range allocation */
if (start + size == end)
- return __drm_buddy_alloc_range(mm, start, size, blocks);
-
- if (!IS_ALIGNED(size, min_page_size))
- return -EINVAL;
+ return __drm_buddy_alloc_range(mm, start, size, NULL, blocks);
+
+ original_size = size;
+ original_min_size = min_block_size;
+
+ /* Roundup the size to power of 2 */
+ if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) {
+ size = roundup_pow_of_two(size);
+ min_block_size = size;
+ /* Align size value to min_block_size */
+ } else if (!IS_ALIGNED(size, min_block_size)) {
+ size = round_up(size, min_block_size);
+ }
pages = size >> ilog2(mm->chunk_size);
order = fls(pages) - 1;
- min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
+ min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
do {
order = min(order, (unsigned int)fls(pages) - 1);
@@ -716,6 +791,16 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
break;
if (order-- == min_order) {
+ if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION &&
+ !(flags & DRM_BUDDY_RANGE_ALLOCATION))
+ /*
+ * Try contiguous block allocation through
+ * try harder method
+ */
+ return __alloc_contig_try_harder(mm,
+ original_size,
+ original_min_size,
+ blocks);
err = -ENOSPC;
goto err_free;
}
@@ -732,6 +817,31 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
break;
} while (1);
+ /* Trim the allocated block to the required size */
+ if (original_size != size) {
+ struct list_head *trim_list;
+ LIST_HEAD(temp);
+ u64 trim_size;
+
+ trim_list = &allocated;
+ trim_size = original_size;
+
+ if (!list_is_singular(&allocated)) {
+ block = list_last_entry(&allocated, typeof(*block), link);
+ list_move(&block->link, &temp);
+ trim_list = &temp;
+ trim_size = drm_buddy_block_size(mm, block) -
+ (size - original_size);
+ }
+
+ drm_buddy_block_trim(mm,
+ trim_size,
+ trim_list);
+
+ if (!list_empty(&temp))
+ list_splice_tail(trim_list, &allocated);
+ }
+
list_splice_tail(&allocated, blocks);
return 0;
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index 572077ff8ae7..a5b39fc01003 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -22,8 +22,9 @@
start__ >= max__ || size__ > max__ - start__; \
})
-#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
-#define DRM_BUDDY_TOPDOWN_ALLOCATION (1 << 1)
+#define DRM_BUDDY_RANGE_ALLOCATION BIT(0)
+#define DRM_BUDDY_TOPDOWN_ALLOCATION BIT(1)
+#define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2)
struct drm_buddy_block {
#define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
@@ -155,5 +156,4 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
void drm_buddy_block_print(struct drm_buddy *mm,
struct drm_buddy_block *block,
struct drm_printer *p);
-
#endif
--
2.25.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Intel-gfx] [PATCH v2 2/3] drm/amdgpu: Move the size computations to drm buddy
2023-09-09 16:09 [Intel-gfx] [PATCH v2 1/3] drm/buddy: Improve contiguous memory allocation Arunpravin Paneer Selvam
@ 2023-09-09 16:09 ` Arunpravin Paneer Selvam
2023-09-09 16:09 ` [Intel-gfx] [PATCH v2 3/3] drm/i915: " Arunpravin Paneer Selvam
` (2 subsequent siblings)
3 siblings, 0 replies; 6+ messages in thread
From: Arunpravin Paneer Selvam @ 2023-09-09 16:09 UTC (permalink / raw)
To: dri-devel, amd-gfx, intel-gfx
Cc: alexander.deucher, Arunpravin Paneer Selvam, christian.koenig,
matthew.auld
- Move roundup_power_of_two() and IS_ALIGNED() computations to
drm buddy file to support the new try harder mechanism for
contiguous allocation.
- Move trim function call to drm_buddy_alloc_blocks() function.
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 58 ++------------------
1 file changed, 4 insertions(+), 54 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index c7085a747b03..18f58efc9dc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -424,9 +424,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
const struct ttm_place *place,
struct ttm_resource **res)
{
- u64 vis_usage = 0, max_bytes, cur_size, min_block_size;
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
+ u64 vis_usage = 0, max_bytes, min_block_size;
struct amdgpu_vram_mgr_resource *vres;
u64 size, remaining_size, lpfn, fpfn;
struct drm_buddy *mm = &mgr->mm;
@@ -474,6 +474,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
if (place->flags & TTM_PL_FLAG_TOPDOWN)
vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+ if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
+ vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+
if (fpfn || lpfn != mgr->mm.size)
/* Allocate blocks in desired range */
vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
@@ -496,25 +499,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
!(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
min_block_size = (u64)pages_per_block << PAGE_SHIFT;
- cur_size = size;
-
- if (fpfn + size != (u64)place->lpfn << PAGE_SHIFT) {
- /*
- * Except for actual range allocation, modify the size and
- * min_block_size conforming to continuous flag enablement
- */
- if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
- size = roundup_pow_of_two(size);
- min_block_size = size;
- /*
- * Modify the size value if size is not
- * aligned with min_block_size
- */
- } else if (!IS_ALIGNED(size, min_block_size)) {
- size = round_up(size, min_block_size);
- }
- }
-
r = drm_buddy_alloc_blocks(mm, fpfn,
lpfn,
size,
@@ -531,40 +515,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
}
mutex_unlock(&mgr->lock);
- if (cur_size != size) {
- struct drm_buddy_block *block;
- struct list_head *trim_list;
- u64 original_size;
- LIST_HEAD(temp);
-
- trim_list = &vres->blocks;
- original_size = (u64)vres->base.size;
-
- /*
- * If size value is rounded up to min_block_size, trim the last
- * block to the required size
- */
- if (!list_is_singular(&vres->blocks)) {
- block = list_last_entry(&vres->blocks, typeof(*block), link);
- list_move_tail(&block->link, &temp);
- trim_list = &temp;
- /*
- * Compute the original_size value by subtracting the
- * last block size with (aligned size - original size)
- */
- original_size = amdgpu_vram_mgr_block_size(block) - (size - cur_size);
- }
-
- mutex_lock(&mgr->lock);
- drm_buddy_block_trim(mm,
- original_size,
- trim_list);
- mutex_unlock(&mgr->lock);
-
- if (!list_empty(&temp))
- list_splice_tail(trim_list, &vres->blocks);
- }
-
vres->base.start = 0;
list_for_each_entry(block, &vres->blocks, link) {
unsigned long start;
--
2.25.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Intel-gfx] [PATCH v2 3/3] drm/i915: Move the size computations to drm buddy
2023-09-09 16:09 [Intel-gfx] [PATCH v2 1/3] drm/buddy: Improve contiguous memory allocation Arunpravin Paneer Selvam
2023-09-09 16:09 ` [Intel-gfx] [PATCH v2 2/3] drm/amdgpu: Move the size computations to drm buddy Arunpravin Paneer Selvam
@ 2023-09-09 16:09 ` Arunpravin Paneer Selvam
2023-09-09 16:37 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for series starting with [v2,1/3] drm/buddy: Improve contiguous memory allocation Patchwork
2023-09-11 10:46 ` [Intel-gfx] [PATCH v2 1/3] " Matthew Auld
3 siblings, 0 replies; 6+ messages in thread
From: Arunpravin Paneer Selvam @ 2023-09-09 16:09 UTC (permalink / raw)
To: dri-devel, amd-gfx, intel-gfx
Cc: alexander.deucher, Arunpravin Paneer Selvam, christian.koenig,
matthew.auld
- Move roundup_power_of_two() to drm buddy file to support
the new try harder mechanism for contiguous allocation.
- Move trim function call to drm_buddy_alloc_blocks() function.
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
---
drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 23 +++----------------
1 file changed, 3 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index a1bc804cfa15..0d735d5c2b35 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -59,6 +59,9 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
if (place->flags & TTM_PL_FLAG_TOPDOWN)
bman_res->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+ if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
+ bman_res->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+
if (place->fpfn || lpfn != man->size)
bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
@@ -72,18 +75,6 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
GEM_BUG_ON(min_page_size < mm->chunk_size);
GEM_BUG_ON(!IS_ALIGNED(size, min_page_size));
- if (place->fpfn + PFN_UP(bman_res->base.size) != place->lpfn &&
- place->flags & TTM_PL_FLAG_CONTIGUOUS) {
- unsigned long pages;
-
- size = roundup_pow_of_two(size);
- min_page_size = size;
-
- pages = size >> ilog2(mm->chunk_size);
- if (pages > lpfn)
- lpfn = pages;
- }
-
if (size > lpfn << PAGE_SHIFT) {
err = -E2BIG;
goto err_free_res;
@@ -107,14 +98,6 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
if (unlikely(err))
goto err_free_blocks;
- if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
- u64 original_size = (u64)bman_res->base.size;
-
- drm_buddy_block_trim(mm,
- original_size,
- &bman_res->blocks);
- }
-
if (lpfn <= bman->visible_size) {
bman_res->used_visible_size = PFN_UP(bman_res->base.size);
} else {
--
2.25.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Intel-gfx] ✗ Fi.CI.SPARSE: warning for series starting with [v2,1/3] drm/buddy: Improve contiguous memory allocation
2023-09-09 16:09 [Intel-gfx] [PATCH v2 1/3] drm/buddy: Improve contiguous memory allocation Arunpravin Paneer Selvam
2023-09-09 16:09 ` [Intel-gfx] [PATCH v2 2/3] drm/amdgpu: Move the size computations to drm buddy Arunpravin Paneer Selvam
2023-09-09 16:09 ` [Intel-gfx] [PATCH v2 3/3] drm/i915: " Arunpravin Paneer Selvam
@ 2023-09-09 16:37 ` Patchwork
2023-09-11 10:46 ` [Intel-gfx] [PATCH v2 1/3] " Matthew Auld
3 siblings, 0 replies; 6+ messages in thread
From: Patchwork @ 2023-09-09 16:37 UTC (permalink / raw)
To: Arunpravin Paneer Selvam; +Cc: intel-gfx
== Series Details ==
Series: series starting with [v2,1/3] drm/buddy: Improve contiguous memory allocation
URL : https://patchwork.freedesktop.org/series/123501/
State : warning
== Summary ==
Error: dim sparse failed
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Intel-gfx] [PATCH v2 1/3] drm/buddy: Improve contiguous memory allocation
2023-09-09 16:09 [Intel-gfx] [PATCH v2 1/3] drm/buddy: Improve contiguous memory allocation Arunpravin Paneer Selvam
` (2 preceding siblings ...)
2023-09-09 16:37 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for series starting with [v2,1/3] drm/buddy: Improve contiguous memory allocation Patchwork
@ 2023-09-11 10:46 ` Matthew Auld
2023-09-13 14:56 ` Arunpravin Paneer Selvam
3 siblings, 1 reply; 6+ messages in thread
From: Matthew Auld @ 2023-09-11 10:46 UTC (permalink / raw)
To: Arunpravin Paneer Selvam, dri-devel, amd-gfx, intel-gfx
Cc: alexander.deucher, christian.koenig
On 09/09/2023 17:09, Arunpravin Paneer Selvam wrote:
> Problem statement: The current method roundup_power_of_two()
> to allocate contiguous address triggers -ENOSPC in some cases
> even though we have enough free spaces and so to help with
> that we introduce a try harder mechanism.
>
> In case of -ENOSPC, the new try harder mechanism rounddown the
> original size to power of 2 and iterating over the round down
> sized freelist blocks to allocate the required size traversing
> RHS and LHS.
>
> As part of the above new method implementation we moved
> contiguous/alignment size computation part and trim function
> to the drm buddy file.
>
> v2: Modify the alloc_range() function to return total allocated size
> on -ENOSPC err and traverse RHS/LHS to allocate the required
> size (Matthew).
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> ---
> drivers/gpu/drm/drm_buddy.c | 138 ++++++++++++++++++++++++++++++++----
> include/drm/drm_buddy.h | 6 +-
> 2 files changed, 127 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index 7098f125b54a..e909eed9cf60 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -480,10 +480,12 @@ alloc_from_freelist(struct drm_buddy *mm,
> static int __alloc_range(struct drm_buddy *mm,
> struct list_head *dfs,
> u64 start, u64 size,
> - struct list_head *blocks)
> + struct list_head *blocks,
> + u64 *total_allocated_on_err)
> {
> struct drm_buddy_block *block;
> struct drm_buddy_block *buddy;
> + u64 total_allocated = 0;
> LIST_HEAD(allocated);
> u64 end;
> int err;
> @@ -520,6 +522,7 @@ static int __alloc_range(struct drm_buddy *mm,
> }
>
> mark_allocated(block);
> + total_allocated += drm_buddy_block_size(mm, block);
> mm->avail -= drm_buddy_block_size(mm, block);
> list_add_tail(&block->link, &allocated);
> continue;
> @@ -551,13 +554,20 @@ static int __alloc_range(struct drm_buddy *mm,
> __drm_buddy_free(mm, block);
>
> err_free:
> - drm_buddy_free_list(mm, &allocated);
> + if (err == -ENOSPC && total_allocated_on_err) {
> + list_splice_tail(&allocated, blocks);
> + *total_allocated_on_err = total_allocated;
> + } else {
> + drm_buddy_free_list(mm, &allocated);
> + }
> +
> return err;
> }
>
> static int __drm_buddy_alloc_range(struct drm_buddy *mm,
> u64 start,
> u64 size,
> + u64 *total_allocated_on_err,
> struct list_head *blocks)
> {
> LIST_HEAD(dfs);
> @@ -566,7 +576,62 @@ static int __drm_buddy_alloc_range(struct drm_buddy *mm,
> for (i = 0; i < mm->n_roots; ++i)
> list_add_tail(&mm->roots[i]->tmp_link, &dfs);
>
> - return __alloc_range(mm, &dfs, start, size, blocks);
> + return __alloc_range(mm, &dfs, start, size,
> + blocks, total_allocated_on_err);
> +}
> +
> +static int __alloc_contig_try_harder(struct drm_buddy *mm,
> + u64 size,
> + u64 min_block_size,
> + struct list_head *blocks)
> +{
> + u64 rhs_offset, lhs_offset, lhs_size, filled;
> + struct drm_buddy_block *block;
> + struct list_head *list;
> + LIST_HEAD(blocks_lhs);
> + unsigned long pages;
> + unsigned int order;
> + u64 modify_size;
> + int err;
> +
> + modify_size = rounddown_pow_of_two(size);
> + pages = modify_size >> ilog2(mm->chunk_size);
> + order = fls(pages) - 1;
> + if (order == 0)
> + return -ENOSPC;
> +
> + list = &mm->free_list[order];
> + if (list_empty(list))
> + return -ENOSPC;
> +
> + list_for_each_entry_reverse(block, list, link) {
> + /* Allocate blocks traversing RHS */
> + rhs_offset = drm_buddy_block_offset(block);
> + err = __drm_buddy_alloc_range(mm, rhs_offset, size,
> + &filled, blocks);
> + if (!err || err != -ENOSPC)
> + return err;
> +
> + lhs_size = max((size - filled), min_block_size);
> + if (!IS_ALIGNED(lhs_size, min_block_size))
> + lhs_size = round_up(lhs_size, min_block_size);
> +
> + /* Allocate blocks traversing LHS */
> + lhs_offset = drm_buddy_block_offset(block) - lhs_size;
> + err = __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
> + NULL, &blocks_lhs);
> + if (!err) {
> + list_splice(&blocks_lhs, blocks);
> + return 0;
I guess we could attempt to trim this also (could tweak the trim to work
on multiple nodes)? But I guess in practice should be pretty meh, given
that the extra rhs is hopefully not too big in the corner case where the
alignment doesn't fit the min_block_size?
Anyway, for patches 1-3,
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
> + } else if (err != -ENOSPC) {
> + drm_buddy_free_list(mm, blocks);
> + return err;
> + }
> + /* Free blocks for the next iteration */
> + drm_buddy_free_list(mm, blocks);
> + }
> +
> + return -ENOSPC;
> }
>
> /**
> @@ -626,7 +691,7 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>
> new_start = drm_buddy_block_offset(block);
> list_add(&block->tmp_link, &dfs);
> - err = __alloc_range(mm, &dfs, new_start, new_size, blocks);
> + err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
> if (err) {
> mark_allocated(block);
> mm->avail -= drm_buddy_block_size(mm, block);
> @@ -645,7 +710,7 @@ EXPORT_SYMBOL(drm_buddy_block_trim);
> * @start: start of the allowed range for this block
> * @end: end of the allowed range for this block
> * @size: size of the allocation
> - * @min_page_size: alignment of the allocation
> + * @min_block_size: alignment of the allocation
> * @blocks: output list head to add allocated blocks
> * @flags: DRM_BUDDY_*_ALLOCATION flags
> *
> @@ -660,23 +725,24 @@ EXPORT_SYMBOL(drm_buddy_block_trim);
> */
> int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> u64 start, u64 end, u64 size,
> - u64 min_page_size,
> + u64 min_block_size,
> struct list_head *blocks,
> unsigned long flags)
> {
> struct drm_buddy_block *block = NULL;
> + u64 original_size, original_min_size;
> unsigned int min_order, order;
> - unsigned long pages;
> LIST_HEAD(allocated);
> + unsigned long pages;
> int err;
>
> if (size < mm->chunk_size)
> return -EINVAL;
>
> - if (min_page_size < mm->chunk_size)
> + if (min_block_size < mm->chunk_size)
> return -EINVAL;
>
> - if (!is_power_of_2(min_page_size))
> + if (!is_power_of_2(min_block_size))
> return -EINVAL;
>
> if (!IS_ALIGNED(start | end | size, mm->chunk_size))
> @@ -690,14 +756,23 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>
> /* Actual range allocation */
> if (start + size == end)
> - return __drm_buddy_alloc_range(mm, start, size, blocks);
> -
> - if (!IS_ALIGNED(size, min_page_size))
> - return -EINVAL;
> + return __drm_buddy_alloc_range(mm, start, size, NULL, blocks);
> +
> + original_size = size;
> + original_min_size = min_block_size;
> +
> + /* Roundup the size to power of 2 */
> + if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) {
> + size = roundup_pow_of_two(size);
> + min_block_size = size;
> + /* Align size value to min_block_size */
> + } else if (!IS_ALIGNED(size, min_block_size)) {
> + size = round_up(size, min_block_size);
> + }
>
> pages = size >> ilog2(mm->chunk_size);
> order = fls(pages) - 1;
> - min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
> + min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
>
> do {
> order = min(order, (unsigned int)fls(pages) - 1);
> @@ -716,6 +791,16 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> break;
>
> if (order-- == min_order) {
> + if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION &&
> + !(flags & DRM_BUDDY_RANGE_ALLOCATION))
> + /*
> + * Try contiguous block allocation through
> + * try harder method
> + */
> + return __alloc_contig_try_harder(mm,
> + original_size,
> + original_min_size,
> + blocks);
> err = -ENOSPC;
> goto err_free;
> }
> @@ -732,6 +817,31 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> break;
> } while (1);
>
> + /* Trim the allocated block to the required size */
> + if (original_size != size) {
> + struct list_head *trim_list;
> + LIST_HEAD(temp);
> + u64 trim_size;
> +
> + trim_list = &allocated;
> + trim_size = original_size;
> +
> + if (!list_is_singular(&allocated)) {
> + block = list_last_entry(&allocated, typeof(*block), link);
> + list_move(&block->link, &temp);
> + trim_list = &temp;
> + trim_size = drm_buddy_block_size(mm, block) -
> + (size - original_size);
> + }
> +
> + drm_buddy_block_trim(mm,
> + trim_size,
> + trim_list);
> +
> + if (!list_empty(&temp))
> + list_splice_tail(trim_list, &allocated);
> + }
> +
> list_splice_tail(&allocated, blocks);
> return 0;
>
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index 572077ff8ae7..a5b39fc01003 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -22,8 +22,9 @@
> start__ >= max__ || size__ > max__ - start__; \
> })
>
> -#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
> -#define DRM_BUDDY_TOPDOWN_ALLOCATION (1 << 1)
> +#define DRM_BUDDY_RANGE_ALLOCATION BIT(0)
> +#define DRM_BUDDY_TOPDOWN_ALLOCATION BIT(1)
> +#define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2)
>
> struct drm_buddy_block {
> #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
> @@ -155,5 +156,4 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
> void drm_buddy_block_print(struct drm_buddy *mm,
> struct drm_buddy_block *block,
> struct drm_printer *p);
> -
> #endif
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Intel-gfx] [PATCH v2 1/3] drm/buddy: Improve contiguous memory allocation
2023-09-11 10:46 ` [Intel-gfx] [PATCH v2 1/3] " Matthew Auld
@ 2023-09-13 14:56 ` Arunpravin Paneer Selvam
0 siblings, 0 replies; 6+ messages in thread
From: Arunpravin Paneer Selvam @ 2023-09-13 14:56 UTC (permalink / raw)
To: Matthew Auld, dri-devel, amd-gfx, intel-gfx
Cc: alexander.deucher, christian.koenig
On 11/09/23 03:46, Matthew Auld wrote:
> On 09/09/2023 17:09, Arunpravin Paneer Selvam wrote:
>> Problem statement: The current method roundup_power_of_two()
>> to allocate contiguous address triggers -ENOSPC in some cases
>> even though we have enough free spaces and so to help with
>> that we introduce a try harder mechanism.
>>
>> In case of -ENOSPC, the new try harder mechanism rounddown the
>> original size to power of 2 and iterating over the round down
>> sized freelist blocks to allocate the required size traversing
>> RHS and LHS.
>>
>> As part of the above new method implementation we moved
>> contiguous/alignment size computation part and trim function
>> to the drm buddy file.
>>
>> v2: Modify the alloc_range() function to return total allocated size
>> on -ENOSPC err and traverse RHS/LHS to allocate the required
>> size (Matthew).
>>
>> Signed-off-by: Arunpravin Paneer Selvam
>> <Arunpravin.PaneerSelvam@amd.com>
>> ---
>> drivers/gpu/drm/drm_buddy.c | 138 ++++++++++++++++++++++++++++++++----
>> include/drm/drm_buddy.h | 6 +-
>> 2 files changed, 127 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index 7098f125b54a..e909eed9cf60 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -480,10 +480,12 @@ alloc_from_freelist(struct drm_buddy *mm,
>> static int __alloc_range(struct drm_buddy *mm,
>> struct list_head *dfs,
>> u64 start, u64 size,
>> - struct list_head *blocks)
>> + struct list_head *blocks,
>> + u64 *total_allocated_on_err)
>> {
>> struct drm_buddy_block *block;
>> struct drm_buddy_block *buddy;
>> + u64 total_allocated = 0;
>> LIST_HEAD(allocated);
>> u64 end;
>> int err;
>> @@ -520,6 +522,7 @@ static int __alloc_range(struct drm_buddy *mm,
>> }
>> mark_allocated(block);
>> + total_allocated += drm_buddy_block_size(mm, block);
>> mm->avail -= drm_buddy_block_size(mm, block);
>> list_add_tail(&block->link, &allocated);
>> continue;
>> @@ -551,13 +554,20 @@ static int __alloc_range(struct drm_buddy *mm,
>> __drm_buddy_free(mm, block);
>> err_free:
>> - drm_buddy_free_list(mm, &allocated);
>> + if (err == -ENOSPC && total_allocated_on_err) {
>> + list_splice_tail(&allocated, blocks);
>> + *total_allocated_on_err = total_allocated;
>> + } else {
>> + drm_buddy_free_list(mm, &allocated);
>> + }
>> +
>> return err;
>> }
>> static int __drm_buddy_alloc_range(struct drm_buddy *mm,
>> u64 start,
>> u64 size,
>> + u64 *total_allocated_on_err,
>> struct list_head *blocks)
>> {
>> LIST_HEAD(dfs);
>> @@ -566,7 +576,62 @@ static int __drm_buddy_alloc_range(struct
>> drm_buddy *mm,
>> for (i = 0; i < mm->n_roots; ++i)
>> list_add_tail(&mm->roots[i]->tmp_link, &dfs);
>> - return __alloc_range(mm, &dfs, start, size, blocks);
>> + return __alloc_range(mm, &dfs, start, size,
>> + blocks, total_allocated_on_err);
>> +}
>> +
>> +static int __alloc_contig_try_harder(struct drm_buddy *mm,
>> + u64 size,
>> + u64 min_block_size,
>> + struct list_head *blocks)
>> +{
>> + u64 rhs_offset, lhs_offset, lhs_size, filled;
>> + struct drm_buddy_block *block;
>> + struct list_head *list;
>> + LIST_HEAD(blocks_lhs);
>> + unsigned long pages;
>> + unsigned int order;
>> + u64 modify_size;
>> + int err;
>> +
>> + modify_size = rounddown_pow_of_two(size);
>> + pages = modify_size >> ilog2(mm->chunk_size);
>> + order = fls(pages) - 1;
>> + if (order == 0)
>> + return -ENOSPC;
>> +
>> + list = &mm->free_list[order];
>> + if (list_empty(list))
>> + return -ENOSPC;
>> +
>> + list_for_each_entry_reverse(block, list, link) {
>> + /* Allocate blocks traversing RHS */
>> + rhs_offset = drm_buddy_block_offset(block);
>> + err = __drm_buddy_alloc_range(mm, rhs_offset, size,
>> + &filled, blocks);
>> + if (!err || err != -ENOSPC)
>> + return err;
>> +
>> + lhs_size = max((size - filled), min_block_size);
>> + if (!IS_ALIGNED(lhs_size, min_block_size))
>> + lhs_size = round_up(lhs_size, min_block_size);
>> +
>> + /* Allocate blocks traversing LHS */
>> + lhs_offset = drm_buddy_block_offset(block) - lhs_size;
>> + err = __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
>> + NULL, &blocks_lhs);
>> + if (!err) {
>> + list_splice(&blocks_lhs, blocks);
>> + return 0;
>
> I guess we could attempt to trim this also (could tweak the trim to
> work on multiple nodes)? But I guess in practice should be pretty meh,
> given that the extra rhs is hopefully not too big in the corner case
> where the alignment doesn't fit the min_block_size?
Thanks for the review. good point. I will take a look into it.
Regards,
Arun.
>
> Anyway, for patches 1-3,
> Reviewed-by: Matthew Auld <matthew.auld@intel.com>
>
>> + } else if (err != -ENOSPC) {
>> + drm_buddy_free_list(mm, blocks);
>> + return err;
>> + }
>> + /* Free blocks for the next iteration */
>> + drm_buddy_free_list(mm, blocks);
>> + }
>> +
>> + return -ENOSPC;
>> }
>> /**
>> @@ -626,7 +691,7 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>> new_start = drm_buddy_block_offset(block);
>> list_add(&block->tmp_link, &dfs);
>> - err = __alloc_range(mm, &dfs, new_start, new_size, blocks);
>> + err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
>> if (err) {
>> mark_allocated(block);
>> mm->avail -= drm_buddy_block_size(mm, block);
>> @@ -645,7 +710,7 @@ EXPORT_SYMBOL(drm_buddy_block_trim);
>> * @start: start of the allowed range for this block
>> * @end: end of the allowed range for this block
>> * @size: size of the allocation
>> - * @min_page_size: alignment of the allocation
>> + * @min_block_size: alignment of the allocation
>> * @blocks: output list head to add allocated blocks
>> * @flags: DRM_BUDDY_*_ALLOCATION flags
>> *
>> @@ -660,23 +725,24 @@ EXPORT_SYMBOL(drm_buddy_block_trim);
>> */
>> int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>> u64 start, u64 end, u64 size,
>> - u64 min_page_size,
>> + u64 min_block_size,
>> struct list_head *blocks,
>> unsigned long flags)
>> {
>> struct drm_buddy_block *block = NULL;
>> + u64 original_size, original_min_size;
>> unsigned int min_order, order;
>> - unsigned long pages;
>> LIST_HEAD(allocated);
>> + unsigned long pages;
>> int err;
>> if (size < mm->chunk_size)
>> return -EINVAL;
>> - if (min_page_size < mm->chunk_size)
>> + if (min_block_size < mm->chunk_size)
>> return -EINVAL;
>> - if (!is_power_of_2(min_page_size))
>> + if (!is_power_of_2(min_block_size))
>> return -EINVAL;
>> if (!IS_ALIGNED(start | end | size, mm->chunk_size))
>> @@ -690,14 +756,23 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>> /* Actual range allocation */
>> if (start + size == end)
>> - return __drm_buddy_alloc_range(mm, start, size, blocks);
>> -
>> - if (!IS_ALIGNED(size, min_page_size))
>> - return -EINVAL;
>> + return __drm_buddy_alloc_range(mm, start, size, NULL, blocks);
>> +
>> + original_size = size;
>> + original_min_size = min_block_size;
>> +
>> + /* Roundup the size to power of 2 */
>> + if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) {
>> + size = roundup_pow_of_two(size);
>> + min_block_size = size;
>> + /* Align size value to min_block_size */
>> + } else if (!IS_ALIGNED(size, min_block_size)) {
>> + size = round_up(size, min_block_size);
>> + }
>> pages = size >> ilog2(mm->chunk_size);
>> order = fls(pages) - 1;
>> - min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
>> + min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
>> do {
>> order = min(order, (unsigned int)fls(pages) - 1);
>> @@ -716,6 +791,16 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>> break;
>> if (order-- == min_order) {
>> + if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION &&
>> + !(flags & DRM_BUDDY_RANGE_ALLOCATION))
>> + /*
>> + * Try contiguous block allocation through
>> + * try harder method
>> + */
>> + return __alloc_contig_try_harder(mm,
>> + original_size,
>> + original_min_size,
>> + blocks);
>> err = -ENOSPC;
>> goto err_free;
>> }
>> @@ -732,6 +817,31 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>> break;
>> } while (1);
>> + /* Trim the allocated block to the required size */
>> + if (original_size != size) {
>> + struct list_head *trim_list;
>> + LIST_HEAD(temp);
>> + u64 trim_size;
>> +
>> + trim_list = &allocated;
>> + trim_size = original_size;
>> +
>> + if (!list_is_singular(&allocated)) {
>> + block = list_last_entry(&allocated, typeof(*block), link);
>> + list_move(&block->link, &temp);
>> + trim_list = &temp;
>> + trim_size = drm_buddy_block_size(mm, block) -
>> + (size - original_size);
>> + }
>> +
>> + drm_buddy_block_trim(mm,
>> + trim_size,
>> + trim_list);
>> +
>> + if (!list_empty(&temp))
>> + list_splice_tail(trim_list, &allocated);
>> + }
>> +
>> list_splice_tail(&allocated, blocks);
>> return 0;
>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>> index 572077ff8ae7..a5b39fc01003 100644
>> --- a/include/drm/drm_buddy.h
>> +++ b/include/drm/drm_buddy.h
>> @@ -22,8 +22,9 @@
>> start__ >= max__ || size__ > max__ - start__; \
>> })
>> -#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
>> -#define DRM_BUDDY_TOPDOWN_ALLOCATION (1 << 1)
>> +#define DRM_BUDDY_RANGE_ALLOCATION BIT(0)
>> +#define DRM_BUDDY_TOPDOWN_ALLOCATION BIT(1)
>> +#define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2)
>> struct drm_buddy_block {
>> #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>> @@ -155,5 +156,4 @@ void drm_buddy_print(struct drm_buddy *mm, struct
>> drm_printer *p);
>> void drm_buddy_block_print(struct drm_buddy *mm,
>> struct drm_buddy_block *block,
>> struct drm_printer *p);
>> -
>> #endif
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2023-09-13 14:56 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-09-09 16:09 [Intel-gfx] [PATCH v2 1/3] drm/buddy: Improve contiguous memory allocation Arunpravin Paneer Selvam
2023-09-09 16:09 ` [Intel-gfx] [PATCH v2 2/3] drm/amdgpu: Move the size computations to drm buddy Arunpravin Paneer Selvam
2023-09-09 16:09 ` [Intel-gfx] [PATCH v2 3/3] drm/i915: " Arunpravin Paneer Selvam
2023-09-09 16:37 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for series starting with [v2,1/3] drm/buddy: Improve contiguous memory allocation Patchwork
2023-09-11 10:46 ` [Intel-gfx] [PATCH v2 1/3] " Matthew Auld
2023-09-13 14:56 ` Arunpravin Paneer Selvam
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox