* [Intel-xe] [PATCH 2/3] drm/amd: Convert amdgpu to use suballocation helper.
2023-02-16 14:48 [Intel-xe] [PATCH 0/3] drm, drm/amd, drm/radeon: Introduce a generic suballocator Thomas Hellström
@ 2023-02-16 14:48 ` Thomas Hellström
0 siblings, 0 replies; 12+ messages in thread
From: Thomas Hellström @ 2023-02-16 14:48 UTC (permalink / raw)
To: dri-devel
Cc: Daniel Vetter, Maarten Lankhorst, Christian Koenig, Dave Airlie,
intel-xe
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Now that we have a generic suballocation helper, Use it in amdgpu.
The debug output is slightly different and suballocation may be
slightly more cpu-hungry.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/Kconfig | 1 +
drivers/gpu/drm/amd/amdgpu/Kconfig | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 26 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 5 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 23 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 320 ++-------------------
7 files changed, 43 insertions(+), 336 deletions(-)
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 8fbe57407c60..73ddfdf3a894 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -77,6 +77,7 @@ config DRM_KUNIT_TEST
select DRM_DISPLAY_HELPER
select DRM_LIB_RANDOM
select DRM_KMS_HELPER
+ select DRM_SUBALLOC_HELPER
select DRM_BUDDY
select DRM_EXPORT_FOR_TESTS if m
select DRM_KUNIT_TEST_HELPERS
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 5341b6b242c3..0ed12171450b 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -18,6 +18,7 @@ config DRM_AMDGPU
select BACKLIGHT_CLASS_DEVICE
select INTERVAL_TREE
select DRM_BUDDY
+ select DRM_SUBALLOC_HELPER
# amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work
# ACPI_VIDEO's dependencies must also be selected.
select INPUT if ACPI
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 164141bc8b4a..dda88090f044 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -424,29 +424,11 @@ struct amdgpu_clock {
* alignment).
*/
-#define AMDGPU_SA_NUM_FENCE_LISTS 32
-
struct amdgpu_sa_manager {
- wait_queue_head_t wq;
- struct amdgpu_bo *bo;
- struct list_head *hole;
- struct list_head flist[AMDGPU_SA_NUM_FENCE_LISTS];
- struct list_head olist;
- unsigned size;
- uint64_t gpu_addr;
- void *cpu_ptr;
- uint32_t domain;
- uint32_t align;
-};
-
-/* sub-allocation buffer */
-struct amdgpu_sa_bo {
- struct list_head olist;
- struct list_head flist;
- struct amdgpu_sa_manager *manager;
- unsigned soffset;
- unsigned eoffset;
- struct dma_fence *fence;
+ struct drm_suballoc_manager base;
+ struct amdgpu_bo *bo;
+ uint64_t gpu_addr;
+ void *cpu_ptr;
};
int amdgpu_fence_slab_init(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index bcccc348dbe2..5621b63c7f42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -69,7 +69,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (size) {
r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type],
- &ib->sa_bo, size, 256);
+ &ib->sa_bo, size);
if (r) {
dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
return r;
@@ -309,8 +309,7 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i],
- AMDGPU_IB_POOL_SIZE,
- AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_IB_POOL_SIZE, 256,
AMDGPU_GEM_DOMAIN_GTT);
if (r)
goto error;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 93207badf83f..568baf15d5b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -336,15 +336,22 @@ uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
/*
* sub allocation
*/
+static inline struct amdgpu_sa_manager *
+to_amdgpu_sa_manager(struct drm_suballoc_manager *manager)
+{
+ return container_of(manager, struct amdgpu_sa_manager, base);
+}
-static inline uint64_t amdgpu_sa_bo_gpu_addr(struct amdgpu_sa_bo *sa_bo)
+static inline uint64_t amdgpu_sa_bo_gpu_addr(struct drm_suballoc *sa_bo)
{
- return sa_bo->manager->gpu_addr + sa_bo->soffset;
+ return to_amdgpu_sa_manager(sa_bo->manager)->gpu_addr +
+ drm_suballoc_soffset(sa_bo);
}
-static inline void * amdgpu_sa_bo_cpu_addr(struct amdgpu_sa_bo *sa_bo)
+static inline void * amdgpu_sa_bo_cpu_addr(struct drm_suballoc *sa_bo)
{
- return sa_bo->manager->cpu_ptr + sa_bo->soffset;
+ return to_amdgpu_sa_manager(sa_bo->manager)->cpu_ptr +
+ drm_suballoc_soffset(sa_bo);
}
int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
@@ -355,11 +362,11 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager);
int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
- struct amdgpu_sa_bo **sa_bo,
- unsigned size, unsigned align);
+ struct drm_suballoc **sa_bo,
+ unsigned size);
void amdgpu_sa_bo_free(struct amdgpu_device *adev,
- struct amdgpu_sa_bo **sa_bo,
- struct dma_fence *fence);
+ struct drm_suballoc **sa_bo,
+ struct dma_fence *fence);
#if defined(CONFIG_DEBUG_FS)
void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
struct seq_file *m);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 3989e755a5b4..018f36b10de8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -27,6 +27,7 @@
#include <drm/amdgpu_drm.h>
#include <drm/gpu_scheduler.h>
#include <drm/drm_print.h>
+#include <drm/drm_suballoc.h>
struct amdgpu_device;
struct amdgpu_ring;
@@ -92,7 +93,7 @@ enum amdgpu_ib_pool_type {
};
struct amdgpu_ib {
- struct amdgpu_sa_bo *sa_bo;
+ struct drm_suballoc *sa_bo;
uint32_t length_dw;
uint64_t gpu_addr;
uint32_t *ptr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 524d10b21041..e7b3539e0294 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -44,327 +44,61 @@
#include "amdgpu.h"
-static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo);
-static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager);
-
int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager,
- unsigned size, u32 align, u32 domain)
+ unsigned size, u32 suballoc_align, u32 domain)
{
- int i, r;
-
- init_waitqueue_head(&sa_manager->wq);
- sa_manager->bo = NULL;
- sa_manager->size = size;
- sa_manager->domain = domain;
- sa_manager->align = align;
- sa_manager->hole = &sa_manager->olist;
- INIT_LIST_HEAD(&sa_manager->olist);
- for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
- INIT_LIST_HEAD(&sa_manager->flist[i]);
+ int r;
- r = amdgpu_bo_create_kernel(adev, size, align, domain, &sa_manager->bo,
+ r = amdgpu_bo_create_kernel(adev, size, AMDGPU_GPU_PAGE_SIZE, domain, &sa_manager->bo,
&sa_manager->gpu_addr, &sa_manager->cpu_ptr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r);
return r;
}
- memset(sa_manager->cpu_ptr, 0, sa_manager->size);
+ memset(sa_manager->cpu_ptr, 0, size);
+ drm_suballoc_manager_init(&sa_manager->base, size, suballoc_align);
return r;
}
void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager)
{
- struct amdgpu_sa_bo *sa_bo, *tmp;
-
if (sa_manager->bo == NULL) {
dev_err(adev->dev, "no bo for sa manager\n");
return;
}
- if (!list_empty(&sa_manager->olist)) {
- sa_manager->hole = &sa_manager->olist,
- amdgpu_sa_bo_try_free(sa_manager);
- if (!list_empty(&sa_manager->olist)) {
- dev_err(adev->dev, "sa_manager is not empty, clearing anyway\n");
- }
- }
- list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
- amdgpu_sa_bo_remove_locked(sa_bo);
- }
+ drm_suballoc_manager_fini(&sa_manager->base);
amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
- sa_manager->size = 0;
}
-static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
-{
- struct amdgpu_sa_manager *sa_manager = sa_bo->manager;
- if (sa_manager->hole == &sa_bo->olist) {
- sa_manager->hole = sa_bo->olist.prev;
- }
- list_del_init(&sa_bo->olist);
- list_del_init(&sa_bo->flist);
- dma_fence_put(sa_bo->fence);
- kfree(sa_bo);
-}
-
-static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager)
+int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
+ struct drm_suballoc **sa_bo,
+ unsigned size)
{
- struct amdgpu_sa_bo *sa_bo, *tmp;
+ struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size, GFP_KERNEL, true);
- if (sa_manager->hole->next == &sa_manager->olist)
- return;
+ if (IS_ERR(sa)) {
+ *sa_bo = NULL;
- sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist);
- list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
- if (sa_bo->fence == NULL ||
- !dma_fence_is_signaled(sa_bo->fence)) {
- return;
- }
- amdgpu_sa_bo_remove_locked(sa_bo);
+ return PTR_ERR(sa);
}
-}
-static inline unsigned amdgpu_sa_bo_hole_soffset(struct amdgpu_sa_manager *sa_manager)
-{
- struct list_head *hole = sa_manager->hole;
-
- if (hole != &sa_manager->olist) {
- return list_entry(hole, struct amdgpu_sa_bo, olist)->eoffset;
- }
+ *sa_bo = sa;
return 0;
}
-static inline unsigned amdgpu_sa_bo_hole_eoffset(struct amdgpu_sa_manager *sa_manager)
-{
- struct list_head *hole = sa_manager->hole;
-
- if (hole->next != &sa_manager->olist) {
- return list_entry(hole->next, struct amdgpu_sa_bo, olist)->soffset;
- }
- return sa_manager->size;
-}
-
-static bool amdgpu_sa_bo_try_alloc(struct amdgpu_sa_manager *sa_manager,
- struct amdgpu_sa_bo *sa_bo,
- unsigned size, unsigned align)
-{
- unsigned soffset, eoffset, wasted;
-
- soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
- eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
- wasted = (align - (soffset % align)) % align;
-
- if ((eoffset - soffset) >= (size + wasted)) {
- soffset += wasted;
-
- sa_bo->manager = sa_manager;
- sa_bo->soffset = soffset;
- sa_bo->eoffset = soffset + size;
- list_add(&sa_bo->olist, sa_manager->hole);
- INIT_LIST_HEAD(&sa_bo->flist);
- sa_manager->hole = &sa_bo->olist;
- return true;
- }
- return false;
-}
-
-/**
- * amdgpu_sa_event - Check if we can stop waiting
- *
- * @sa_manager: pointer to the sa_manager
- * @size: number of bytes we want to allocate
- * @align: alignment we need to match
- *
- * Check if either there is a fence we can wait for or
- * enough free memory to satisfy the allocation directly
- */
-static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager,
- unsigned size, unsigned align)
-{
- unsigned soffset, eoffset, wasted;
- int i;
-
- for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
- if (!list_empty(&sa_manager->flist[i]))
- return true;
-
- soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
- eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
- wasted = (align - (soffset % align)) % align;
-
- if ((eoffset - soffset) >= (size + wasted)) {
- return true;
- }
-
- return false;
-}
-
-static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
- struct dma_fence **fences,
- unsigned *tries)
-{
- struct amdgpu_sa_bo *best_bo = NULL;
- unsigned i, soffset, best, tmp;
-
- /* if hole points to the end of the buffer */
- if (sa_manager->hole->next == &sa_manager->olist) {
- /* try again with its beginning */
- sa_manager->hole = &sa_manager->olist;
- return true;
- }
-
- soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
- /* to handle wrap around we add sa_manager->size */
- best = sa_manager->size * 2;
- /* go over all fence list and try to find the closest sa_bo
- * of the current last
- */
- for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
- struct amdgpu_sa_bo *sa_bo;
-
- fences[i] = NULL;
-
- if (list_empty(&sa_manager->flist[i]))
- continue;
-
- sa_bo = list_first_entry(&sa_manager->flist[i],
- struct amdgpu_sa_bo, flist);
-
- if (!dma_fence_is_signaled(sa_bo->fence)) {
- fences[i] = sa_bo->fence;
- continue;
- }
-
- /* limit the number of tries each ring gets */
- if (tries[i] > 2) {
- continue;
- }
-
- tmp = sa_bo->soffset;
- if (tmp < soffset) {
- /* wrap around, pretend it's after */
- tmp += sa_manager->size;
- }
- tmp -= soffset;
- if (tmp < best) {
- /* this sa bo is the closest one */
- best = tmp;
- best_bo = sa_bo;
- }
- }
-
- if (best_bo) {
- uint32_t idx = best_bo->fence->context;
-
- idx %= AMDGPU_SA_NUM_FENCE_LISTS;
- ++tries[idx];
- sa_manager->hole = best_bo->olist.prev;
-
- /* we knew that this one is signaled,
- so it's save to remote it */
- amdgpu_sa_bo_remove_locked(best_bo);
- return true;
- }
- return false;
-}
-
-int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
- struct amdgpu_sa_bo **sa_bo,
- unsigned size, unsigned align)
-{
- struct dma_fence *fences[AMDGPU_SA_NUM_FENCE_LISTS];
- unsigned tries[AMDGPU_SA_NUM_FENCE_LISTS];
- unsigned count;
- int i, r;
- signed long t;
-
- if (WARN_ON_ONCE(align > sa_manager->align))
- return -EINVAL;
-
- if (WARN_ON_ONCE(size > sa_manager->size))
- return -EINVAL;
-
- *sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL);
- if (!(*sa_bo))
- return -ENOMEM;
- (*sa_bo)->manager = sa_manager;
- (*sa_bo)->fence = NULL;
- INIT_LIST_HEAD(&(*sa_bo)->olist);
- INIT_LIST_HEAD(&(*sa_bo)->flist);
-
- spin_lock(&sa_manager->wq.lock);
- do {
- for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
- tries[i] = 0;
-
- do {
- amdgpu_sa_bo_try_free(sa_manager);
-
- if (amdgpu_sa_bo_try_alloc(sa_manager, *sa_bo,
- size, align)) {
- spin_unlock(&sa_manager->wq.lock);
- return 0;
- }
-
- /* see if we can skip over some allocations */
- } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
-
- for (i = 0, count = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
- if (fences[i])
- fences[count++] = dma_fence_get(fences[i]);
-
- if (count) {
- spin_unlock(&sa_manager->wq.lock);
- t = dma_fence_wait_any_timeout(fences, count, false,
- MAX_SCHEDULE_TIMEOUT,
- NULL);
- for (i = 0; i < count; ++i)
- dma_fence_put(fences[i]);
-
- r = (t > 0) ? 0 : t;
- spin_lock(&sa_manager->wq.lock);
- } else {
- /* if we have nothing to wait for block */
- r = wait_event_interruptible_locked(
- sa_manager->wq,
- amdgpu_sa_event(sa_manager, size, align)
- );
- }
-
- } while (!r);
-
- spin_unlock(&sa_manager->wq.lock);
- kfree(*sa_bo);
- *sa_bo = NULL;
- return r;
-}
-
-void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
+void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct drm_suballoc **sa_bo,
struct dma_fence *fence)
{
- struct amdgpu_sa_manager *sa_manager;
-
if (sa_bo == NULL || *sa_bo == NULL) {
return;
}
- sa_manager = (*sa_bo)->manager;
- spin_lock(&sa_manager->wq.lock);
- if (fence && !dma_fence_is_signaled(fence)) {
- uint32_t idx;
-
- (*sa_bo)->fence = dma_fence_get(fence);
- idx = fence->context % AMDGPU_SA_NUM_FENCE_LISTS;
- list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
- } else {
- amdgpu_sa_bo_remove_locked(*sa_bo);
- }
- wake_up_all_locked(&sa_manager->wq);
- spin_unlock(&sa_manager->wq.lock);
+ drm_suballoc_free(*sa_bo, fence);
*sa_bo = NULL;
}
@@ -373,26 +107,8 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
struct seq_file *m)
{
- struct amdgpu_sa_bo *i;
-
- spin_lock(&sa_manager->wq.lock);
- list_for_each_entry(i, &sa_manager->olist, olist) {
- uint64_t soffset = i->soffset + sa_manager->gpu_addr;
- uint64_t eoffset = i->eoffset + sa_manager->gpu_addr;
- if (&i->olist == sa_manager->hole) {
- seq_printf(m, ">");
- } else {
- seq_printf(m, " ");
- }
- seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
- soffset, eoffset, eoffset - soffset);
+ struct drm_printer p = drm_seq_file_printer(m);
- if (i->fence)
- seq_printf(m, " protected by 0x%016llx on context %llu",
- i->fence->seqno, i->fence->context);
-
- seq_printf(m, "\n");
- }
- spin_unlock(&sa_manager->wq.lock);
+ drm_suballoc_dump_debug_info(&sa_manager->base, &p, sa_manager->gpu_addr);
}
#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Intel-xe] [PATCH 0/3] drm/helpers: Make the suballocation manager drm generic
@ 2023-02-23 10:57 Thomas Hellström
2023-02-23 10:57 ` [Intel-xe] [PATCH 1/3] drm/suballoc: Extract amdgpu_sa.c as generic suballocation helper Thomas Hellström
` (2 more replies)
0 siblings, 3 replies; 12+ messages in thread
From: Thomas Hellström @ 2023-02-23 10:57 UTC (permalink / raw)
To: dri-devel
Cc: Daniel Vetter, Maarten Lankhorst, Christian Koenig, Dave Airlie,
intel-xe
This series (or at least the suballocator helper) is a prerequisite
for the new Xe driver.
There was an unresolved issue when the series was last up for review,
and that was the per allocation aligment. Last message was from
Maarten Lankhorst arguing that the larger per-driver alignment used
would only incur a small memory cost. This new variant resolves that.
The generic suballocator has been tested with the Xe driver, and a
kunit test is under development.
The amd- and radeon adaptations are only compile-tested.
Maarten Lankhorst (3):
drm/suballoc: Extract amdgpu_sa.c as generic suballocation helper
drm/amd: Convert amdgpu to use suballocation helper.
drm/radeon: Use the drm suballocation manager implementation.
drivers/gpu/drm/Kconfig | 5 +
drivers/gpu/drm/Makefile | 3 +
drivers/gpu/drm/amd/amdgpu/Kconfig | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 26 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 5 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 23 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 324 +--------------
drivers/gpu/drm/drm_suballoc.c | 457 +++++++++++++++++++++
drivers/gpu/drm/radeon/radeon.h | 55 +--
drivers/gpu/drm/radeon/radeon_ib.c | 12 +-
drivers/gpu/drm/radeon/radeon_object.h | 25 +-
drivers/gpu/drm/radeon/radeon_sa.c | 316 ++------------
drivers/gpu/drm/radeon/radeon_semaphore.c | 4 +-
include/drm/drm_suballoc.h | 106 +++++
15 files changed, 672 insertions(+), 693 deletions(-)
create mode 100644 drivers/gpu/drm/drm_suballoc.c
create mode 100644 include/drm/drm_suballoc.h
--
2.34.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [Intel-xe] [PATCH 1/3] drm/suballoc: Extract amdgpu_sa.c as generic suballocation helper
2023-02-23 10:57 [Intel-xe] [PATCH 0/3] drm/helpers: Make the suballocation manager drm generic Thomas Hellström
@ 2023-02-23 10:57 ` Thomas Hellström
2023-02-23 11:13 ` Christian König
2023-02-23 10:57 ` [Intel-xe] [PATCH 2/3] drm/amd: Convert amdgpu to use " Thomas Hellström
2023-02-23 10:57 ` [Intel-xe] [PATCH 3/3] drm/radeon: Use the drm suballocation manager implementation Thomas Hellström
2 siblings, 1 reply; 12+ messages in thread
From: Thomas Hellström @ 2023-02-23 10:57 UTC (permalink / raw)
To: dri-devel
Cc: Daniel Vetter, Maarten Lankhorst, Christian Koenig, Dave Airlie,
intel-xe
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Suballocating a buffer object is something that is not driver-specific
and useful for many drivers.
Use a slightly modified version of amdgpu_sa.c
v2:
- Style cleanups. (Thomas)
- Added / Modified documentation (Thomas)
- Use u64 for the sizes and offset. The code dates back to 2012 and
using unsigned int will probably soon come back to bite us.
We can consider size_t as well for better 32-bit efficiency. (Thomas)
- Add and document gfp, intr and align arguments to drm_suballoc_new()
(Thomas)
- Use drm_printer for debug output (Thomas)
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/Kconfig | 4 +
drivers/gpu/drm/Makefile | 3 +
drivers/gpu/drm/drm_suballoc.c | 457 +++++++++++++++++++++++++++++++++
include/drm/drm_suballoc.h | 106 ++++++++
4 files changed, 570 insertions(+)
create mode 100644 drivers/gpu/drm/drm_suballoc.c
create mode 100644 include/drm/drm_suballoc.h
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index dc0f94f02a82..8fbe57407c60 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -232,6 +232,10 @@ config DRM_GEM_SHMEM_HELPER
help
Choose this if you need the GEM shmem helper functions
+config DRM_SUBALLOC_HELPER
+ tristate
+ depends on DRM
+
config DRM_SCHED
tristate
depends on DRM
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index ab4460fcd63f..1e04d135e866 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -88,6 +88,9 @@ obj-$(CONFIG_DRM_GEM_DMA_HELPER) += drm_dma_helper.o
drm_shmem_helper-y := drm_gem_shmem_helper.o
obj-$(CONFIG_DRM_GEM_SHMEM_HELPER) += drm_shmem_helper.o
+drm_suballoc_helper-y := drm_suballoc.o
+obj-$(CONFIG_DRM_SUBALLOC_HELPER) += drm_suballoc_helper.o
+
drm_vram_helper-y := drm_gem_vram_helper.o
obj-$(CONFIG_DRM_VRAM_HELPER) += drm_vram_helper.o
diff --git a/drivers/gpu/drm/drm_suballoc.c b/drivers/gpu/drm/drm_suballoc.c
new file mode 100644
index 000000000000..057cd19c44ba
--- /dev/null
+++ b/drivers/gpu/drm/drm_suballoc.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2011 Red Hat Inc.
+ * Copyright 2023 Intel Corporation.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ * Jerome Glisse <glisse@freedesktop.org>
+ */
+/* Algorithm:
+ *
+ * We store the last allocated bo in "hole", we always try to allocate
+ * after the last allocated bo. Principle is that in a linear GPU ring
+ * progression was is after last is the oldest bo we allocated and thus
+ * the first one that should no longer be in use by the GPU.
+ *
+ * If it's not the case we skip over the bo after last to the closest
+ * done bo if such one exist. If none exist and we are not asked to
+ * block we report failure to allocate.
+ *
+ * If we are asked to block we wait on all the oldest fence of all
+ * rings. We just wait for any of those fence to complete.
+ */
+
+#include <drm/drm_suballoc.h>
+#include <drm/drm_print.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/dma-fence.h>
+
+static void drm_suballoc_remove_locked(struct drm_suballoc *sa);
+static void drm_suballoc_try_free(struct drm_suballoc_manager *sa_manager);
+
+/**
+ * drm_suballoc_manager_init() - Initialise the drm_suballoc_manager
+ * @sa_manager: pointer to the sa_manager
+ * @size: number of bytes we want to suballocate
+ * @align: alignment for each suballocated chunk
+ *
+ * Prepares the suballocation manager for suballocations.
+ */
+void drm_suballoc_manager_init(struct drm_suballoc_manager *sa_manager,
+ u64 size, u64 align)
+{
+ unsigned int i;
+
+ if (!align)
+ align = 1;
+
+ /* alignment must be a power of 2 */
+ if (WARN_ON_ONCE(align & (align - 1)))
+ align = roundup_pow_of_two(align);
+
+ init_waitqueue_head(&sa_manager->wq);
+ sa_manager->size = size;
+ sa_manager->align = align;
+ sa_manager->hole = &sa_manager->olist;
+ INIT_LIST_HEAD(&sa_manager->olist);
+ for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i)
+ INIT_LIST_HEAD(&sa_manager->flist[i]);
+}
+EXPORT_SYMBOL(drm_suballoc_manager_init);
+
+/**
+ * drm_suballoc_manager_fini() - Destroy the drm_suballoc_manager
+ * @sa_manager: pointer to the sa_manager
+ *
+ * Cleans up the suballocation manager after use. All fences added
+ * with drm_suballoc_free() must be signaled, or we cannot clean up
+ * the entire manager.
+ */
+void drm_suballoc_manager_fini(struct drm_suballoc_manager *sa_manager)
+{
+ struct drm_suballoc *sa, *tmp;
+
+ if (!sa_manager->size)
+ return;
+
+ if (!list_empty(&sa_manager->olist)) {
+ sa_manager->hole = &sa_manager->olist;
+ drm_suballoc_try_free(sa_manager);
+ if (!list_empty(&sa_manager->olist))
+ DRM_ERROR("sa_manager is not empty, clearing anyway\n");
+ }
+ list_for_each_entry_safe(sa, tmp, &sa_manager->olist, olist) {
+ drm_suballoc_remove_locked(sa);
+ }
+
+ sa_manager->size = 0;
+}
+EXPORT_SYMBOL(drm_suballoc_manager_fini);
+
+static void drm_suballoc_remove_locked(struct drm_suballoc *sa)
+{
+ struct drm_suballoc_manager *sa_manager = sa->manager;
+
+ if (sa_manager->hole == &sa->olist)
+ sa_manager->hole = sa->olist.prev;
+
+ list_del_init(&sa->olist);
+ list_del_init(&sa->flist);
+ dma_fence_put(sa->fence);
+ kfree(sa);
+}
+
+static void drm_suballoc_try_free(struct drm_suballoc_manager *sa_manager)
+{
+ struct drm_suballoc *sa, *tmp;
+
+ if (sa_manager->hole->next == &sa_manager->olist)
+ return;
+
+ sa = list_entry(sa_manager->hole->next, struct drm_suballoc, olist);
+ list_for_each_entry_safe_from(sa, tmp, &sa_manager->olist, olist) {
+ if (!sa->fence || !dma_fence_is_signaled(sa->fence))
+ return;
+
+ drm_suballoc_remove_locked(sa);
+ }
+}
+
+static u64 drm_suballoc_hole_soffset(struct drm_suballoc_manager *sa_manager)
+{
+ struct list_head *hole = sa_manager->hole;
+
+ if (hole != &sa_manager->olist)
+ return list_entry(hole, struct drm_suballoc, olist)->eoffset;
+
+ return 0;
+}
+
+static u64 drm_suballoc_hole_eoffset(struct drm_suballoc_manager *sa_manager)
+{
+ struct list_head *hole = sa_manager->hole;
+
+ if (hole->next != &sa_manager->olist)
+ return list_entry(hole->next, struct drm_suballoc, olist)->soffset;
+ return sa_manager->size;
+}
+
+static bool drm_suballoc_try_alloc(struct drm_suballoc_manager *sa_manager,
+ struct drm_suballoc *sa,
+ u64 size, u64 align)
+{
+ u64 soffset, eoffset, wasted;
+
+ soffset = drm_suballoc_hole_soffset(sa_manager);
+ eoffset = drm_suballoc_hole_eoffset(sa_manager);
+ wasted = (align - (soffset % align)) % align;
+
+ if ((eoffset - soffset) >= (size + wasted)) {
+ soffset += wasted;
+
+ sa->manager = sa_manager;
+ sa->soffset = soffset;
+ sa->eoffset = soffset + size;
+ list_add(&sa->olist, sa_manager->hole);
+ INIT_LIST_HEAD(&sa->flist);
+ sa_manager->hole = &sa->olist;
+ return true;
+ }
+ return false;
+}
+
+static bool __drm_suballoc_event(struct drm_suballoc_manager *sa_manager,
+ u64 size, u64 align)
+{
+ u64 soffset, eoffset, wasted;
+ unsigned int i;
+
+ for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i)
+ if (!list_empty(&sa_manager->flist[i]))
+ return true;
+
+ soffset = drm_suballoc_hole_soffset(sa_manager);
+ eoffset = drm_suballoc_hole_eoffset(sa_manager);
+ wasted = (align - (soffset % align)) % align;
+
+ return ((eoffset - soffset) >= (size + wasted));
+}
+
+/**
+ * drm_suballoc_event() - Check if we can stop waiting
+ * @sa_manager: pointer to the sa_manager
+ * @size: number of bytes we want to allocate
+ * @align: alignment we need to match
+ *
+ * Return: true if either there is a fence we can wait for or
+ * enough free memory to satisfy the allocation directly.
+ * false otherwise.
+ */
+static bool drm_suballoc_event(struct drm_suballoc_manager *sa_manager,
+ u64 size, u64 align)
+{
+ bool ret;
+
+ spin_lock(&sa_manager->wq.lock);
+ ret = __drm_suballoc_event(sa_manager, size, align);
+ spin_unlock(&sa_manager->wq.lock);
+ return ret;
+}
+
+static bool drm_suballoc_next_hole(struct drm_suballoc_manager *sa_manager,
+ struct dma_fence **fences,
+ unsigned int *tries)
+{
+ struct drm_suballoc *best_bo = NULL;
+ unsigned int i, best_idx;
+ u64 soffset, best, tmp;
+
+ /* if hole points to the end of the buffer */
+ if (sa_manager->hole->next == &sa_manager->olist) {
+ /* try again with its beginning */
+ sa_manager->hole = &sa_manager->olist;
+ return true;
+ }
+
+ soffset = drm_suballoc_hole_soffset(sa_manager);
+ /* to handle wrap around we add sa_manager->size */
+ best = sa_manager->size * 2;
+ /* go over all fence list and try to find the closest sa
+ * of the current last
+ */
+ for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i) {
+ struct drm_suballoc *sa;
+
+ fences[i] = NULL;
+
+ if (list_empty(&sa_manager->flist[i]))
+ continue;
+
+ sa = list_first_entry(&sa_manager->flist[i],
+ struct drm_suballoc, flist);
+
+ if (!dma_fence_is_signaled(sa->fence)) {
+ fences[i] = sa->fence;
+ continue;
+ }
+
+ /* limit the number of tries each freelist gets */
+ if (tries[i] > 2)
+ continue;
+
+ tmp = sa->soffset;
+ if (tmp < soffset) {
+ /* wrap around, pretend it's after */
+ tmp += sa_manager->size;
+ }
+ tmp -= soffset;
+ if (tmp < best) {
+ /* this sa bo is the closest one */
+ best = tmp;
+ best_idx = i;
+ best_bo = sa;
+ }
+ }
+
+ if (best_bo) {
+ ++tries[best_idx];
+ sa_manager->hole = best_bo->olist.prev;
+
+ /*
+ * We know that this one is signaled,
+ * so it's safe to remove it.
+ */
+ drm_suballoc_remove_locked(best_bo);
+ return true;
+ }
+ return false;
+}
+
+/**
+ * drm_suballoc_new() - Make a suballocation.
+ * @sa_manager: pointer to the sa_manager
+ * @size: number of bytes we want to suballocate.
+ * @gfp: gfp flags used for memory allocation. Typically GFP_KERNEL but
+ * the argument is provided for suballocations from reclaim context or
+ * where the caller wants to avoid pipelining rather than wait for
+ * reclaim.
+ * @intr: Whether to perform waits interruptible. This should typically
+ * always be true, unless the caller needs to propagate a
+ * non-interruptible context from above layers.
+ * @align: Alignment. Must not exceed the default manager alignment.
+ * If @align is zero, then the manager alignment is used.
+ *
+ * Try to make a suballocation of size @size, which will be rounded
+ * up to the alignment specified in specified in drm_suballoc_manager_init().
+ *
+ * Return: a new suballocated bo, or an ERR_PTR.
+ */
+struct drm_suballoc *
+drm_suballoc_new(struct drm_suballoc_manager *sa_manager, u64 size,
+ gfp_t gfp, bool intr, u64 align)
+{
+ struct dma_fence *fences[DRM_SUBALLOC_MAX_QUEUES];
+ unsigned int tries[DRM_SUBALLOC_MAX_QUEUES];
+ unsigned int count;
+ int i, r;
+ struct drm_suballoc *sa;
+
+ if (WARN_ON_ONCE(align > sa_manager->align))
+ return ERR_PTR(-EINVAL);
+ if (WARN_ON_ONCE(size > sa_manager->size || !size))
+ return ERR_PTR(-EINVAL);
+
+ if (!align)
+ align = sa_manager->align;
+
+ sa = kmalloc(sizeof(*sa), gfp);
+ if (!sa)
+ return ERR_PTR(-ENOMEM);
+ sa->manager = sa_manager;
+ sa->fence = NULL;
+ INIT_LIST_HEAD(&sa->olist);
+ INIT_LIST_HEAD(&sa->flist);
+
+ spin_lock(&sa_manager->wq.lock);
+ do {
+ for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i)
+ tries[i] = 0;
+
+ do {
+ drm_suballoc_try_free(sa_manager);
+
+ if (drm_suballoc_try_alloc(sa_manager, sa,
+ size, align)) {
+ spin_unlock(&sa_manager->wq.lock);
+ return sa;
+ }
+
+ /* see if we can skip over some allocations */
+ } while (drm_suballoc_next_hole(sa_manager, fences, tries));
+
+ for (i = 0, count = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i)
+ if (fences[i])
+ fences[count++] = dma_fence_get(fences[i]);
+
+ if (count) {
+ long t;
+
+ spin_unlock(&sa_manager->wq.lock);
+ t = dma_fence_wait_any_timeout(fences, count, intr,
+ MAX_SCHEDULE_TIMEOUT,
+ NULL);
+ for (i = 0; i < count; ++i)
+ dma_fence_put(fences[i]);
+
+ r = (t > 0) ? 0 : t;
+ spin_lock(&sa_manager->wq.lock);
+ } else if (intr) {
+ /* if we have nothing to wait for block */
+ r = wait_event_interruptible_locked
+ (sa_manager->wq,
+ __drm_suballoc_event(sa_manager, size, align));
+ } else {
+ spin_unlock(&sa_manager->wq.lock);
+ wait_event(sa_manager->wq,
+ drm_suballoc_event(sa_manager, size, align));
+ r = 0;
+ spin_lock(&sa_manager->wq.lock);
+ }
+ } while (!r);
+
+ spin_unlock(&sa_manager->wq.lock);
+ kfree(sa);
+ return ERR_PTR(r);
+}
+EXPORT_SYMBOL(drm_suballoc_new);
+
+/**
+ * drm_suballoc_free - Free a suballocation
+ * @suballoc: pointer to the suballocation
+ * @fence: fence that signals when suballocation is idle
+ *
+ * Free the suballocation. The suballocation can be re-used after @fence signals.
+ */
+void drm_suballoc_free(struct drm_suballoc *suballoc,
+ struct dma_fence *fence)
+{
+ struct drm_suballoc_manager *sa_manager;
+
+ if (!suballoc)
+ return;
+
+ sa_manager = suballoc->manager;
+
+ spin_lock(&sa_manager->wq.lock);
+ if (fence && !dma_fence_is_signaled(fence)) {
+ u64 idx;
+
+ suballoc->fence = dma_fence_get(fence);
+ idx = fence->context % DRM_SUBALLOC_MAX_QUEUES;
+ list_add_tail(&suballoc->flist, &sa_manager->flist[idx]);
+ } else {
+ drm_suballoc_remove_locked(suballoc);
+ }
+ wake_up_all_locked(&sa_manager->wq);
+ spin_unlock(&sa_manager->wq.lock);
+}
+EXPORT_SYMBOL(drm_suballoc_free);
+
+#ifdef CONFIG_DEBUG_FS
+void drm_suballoc_dump_debug_info(struct drm_suballoc_manager *sa_manager,
+ struct drm_printer *p, u64 suballoc_base)
+{
+ struct drm_suballoc *i;
+
+ spin_lock(&sa_manager->wq.lock);
+ list_for_each_entry(i, &sa_manager->olist, olist) {
+ u64 soffset = i->soffset;
+ u64 eoffset = i->eoffset;
+
+ if (&i->olist == sa_manager->hole)
+ drm_puts(p, ">");
+ else
+ drm_puts(p, " ");
+
+ drm_printf(p, "[0x%010llx 0x%010llx] size %8lld",
+ suballoc_base + soffset, suballoc_base + eoffset,
+ eoffset - soffset);
+
+ if (i->fence)
+ drm_printf(p, " protected by 0x%016llx on context %llu",
+ i->fence->seqno, i->fence->context);
+
+ drm_puts(p, "\n");
+ }
+ spin_unlock(&sa_manager->wq.lock);
+}
+EXPORT_SYMBOL(drm_suballoc_dump_debug_info);
+#endif
+MODULE_AUTHOR("Multiple");
+MODULE_DESCRIPTION("Range suballocator helper");
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/include/drm/drm_suballoc.h b/include/drm/drm_suballoc.h
new file mode 100644
index 000000000000..a737f996e5ff
--- /dev/null
+++ b/include/drm/drm_suballoc.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2011 Red Hat Inc.
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef _DRM_SUBALLOC_H_
+#define _DRM_SUBALLOC_H_
+
+#include <drm/drm_mm.h>
+
+#include <linux/dma-fence.h>
+#include <linux/types.h>
+
+#define DRM_SUBALLOC_MAX_QUEUES 32
+/**
+ * struct drm_suballoc_manager - fenced range allocations
+ * @wq: Wait queue for sleeping allocations on contention.
+ * @hole: Pointer to first hole node.
+ * @olist: List of allocated ranges.
+ * @flist: Array[fence context hash] of queues of fenced allocated ranges.
+ * @size: Size of the managed range.
+ * @align: Default alignment for the managed range.
+ */
+struct drm_suballoc_manager {
+ wait_queue_head_t wq;
+ struct list_head *hole;
+ struct list_head olist;
+ struct list_head flist[DRM_SUBALLOC_MAX_QUEUES];
+ u64 size;
+ u64 align;
+};
+
+/**
+ * struct drm_suballoc - Sub-allocated range
+ * @olist: List link for list of allocated ranges.
+ * @flist: List linkk for the manager fenced allocated ranges queues.
+ * @manager: The drm_suballoc_manager.
+ * @soffset: Start offset.
+ * @eoffset: End offset + 1 so that @eoffset - @soffset = size.
+ * @dma_fence: The fence protecting the allocation.
+ */
+struct drm_suballoc {
+ struct list_head olist;
+ struct list_head flist;
+ struct drm_suballoc_manager *manager;
+ u64 soffset;
+ u64 eoffset;
+ struct dma_fence *fence;
+};
+
+void drm_suballoc_manager_init(struct drm_suballoc_manager *sa_manager,
+ u64 size, u64 align);
+
+void drm_suballoc_manager_fini(struct drm_suballoc_manager *sa_manager);
+
+struct drm_suballoc *
+drm_suballoc_new(struct drm_suballoc_manager *sa_manager, u64 size, gfp_t gfp,
+ bool intr, u64 align);
+
+void drm_suballoc_free(struct drm_suballoc *sa, struct dma_fence *fence);
+
+/**
+ * drm_suballoc_soffset - Range start.
+ * @sa: The struct drm_suballoc.
+ *
+ * Return: The start of the allocated range.
+ */
+static inline u64 drm_suballoc_soffset(struct drm_suballoc *sa)
+{
+ return sa->soffset;
+}
+
+/**
+ * drm_suballoc_eoffset - Range end.
+ * @sa: The struct drm_suballoc.
+ *
+ * Return: The end of the allocated range + 1.
+ */
+static inline u64 drm_suballoc_eoffset(struct drm_suballoc *sa)
+{
+ return sa->eoffset;
+}
+
+/**
+ * drm_suballoc_size - Range size.
+ * @sa: The struct drm_suballoc.
+ *
+ * Return: The size of the allocated range.
+ */
+static inline u64 drm_suballoc_size(struct drm_suballoc *sa)
+{
+ return sa->eoffset - sa->soffset;
+}
+
+#ifdef CONFIG_DEBUG_FS
+void drm_suballoc_dump_debug_info(struct drm_suballoc_manager *sa_manager,
+ struct drm_printer *p, u64 suballoc_base);
+#else
+static inline void
+drm_suballoc_dump_debug_info(struct drm_suballoc_manager *sa_manager,
+ struct drm_printer *p, u64 suballoc_base)
+{ }
+
+#endif
+
+#endif /* _DRM_SUBALLOC_H_ */
--
2.34.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Intel-xe] [PATCH 2/3] drm/amd: Convert amdgpu to use suballocation helper.
2023-02-23 10:57 [Intel-xe] [PATCH 0/3] drm/helpers: Make the suballocation manager drm generic Thomas Hellström
2023-02-23 10:57 ` [Intel-xe] [PATCH 1/3] drm/suballoc: Extract amdgpu_sa.c as generic suballocation helper Thomas Hellström
@ 2023-02-23 10:57 ` Thomas Hellström
2023-02-23 11:15 ` Christian König
2023-02-23 10:57 ` [Intel-xe] [PATCH 3/3] drm/radeon: Use the drm suballocation manager implementation Thomas Hellström
2 siblings, 1 reply; 12+ messages in thread
From: Thomas Hellström @ 2023-02-23 10:57 UTC (permalink / raw)
To: dri-devel
Cc: Daniel Vetter, Maarten Lankhorst, Christian Koenig, Dave Airlie,
intel-xe
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Now that we have a generic suballocation helper, Use it in amdgpu.
For lines that get moved or changed, also fix up pre-existing style issues.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/Kconfig | 1 +
drivers/gpu/drm/amd/amdgpu/Kconfig | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 26 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 5 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 23 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 324 ++-------------------
7 files changed, 46 insertions(+), 337 deletions(-)
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 8fbe57407c60..73ddfdf3a894 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -77,6 +77,7 @@ config DRM_KUNIT_TEST
select DRM_DISPLAY_HELPER
select DRM_LIB_RANDOM
select DRM_KMS_HELPER
+ select DRM_SUBALLOC_HELPER
select DRM_BUDDY
select DRM_EXPORT_FOR_TESTS if m
select DRM_KUNIT_TEST_HELPERS
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 5341b6b242c3..0ed12171450b 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -18,6 +18,7 @@ config DRM_AMDGPU
select BACKLIGHT_CLASS_DEVICE
select INTERVAL_TREE
select DRM_BUDDY
+ select DRM_SUBALLOC_HELPER
# amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work
# ACPI_VIDEO's dependencies must also be selected.
select INPUT if ACPI
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 164141bc8b4a..dda88090f044 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -424,29 +424,11 @@ struct amdgpu_clock {
* alignment).
*/
-#define AMDGPU_SA_NUM_FENCE_LISTS 32
-
struct amdgpu_sa_manager {
- wait_queue_head_t wq;
- struct amdgpu_bo *bo;
- struct list_head *hole;
- struct list_head flist[AMDGPU_SA_NUM_FENCE_LISTS];
- struct list_head olist;
- unsigned size;
- uint64_t gpu_addr;
- void *cpu_ptr;
- uint32_t domain;
- uint32_t align;
-};
-
-/* sub-allocation buffer */
-struct amdgpu_sa_bo {
- struct list_head olist;
- struct list_head flist;
- struct amdgpu_sa_manager *manager;
- unsigned soffset;
- unsigned eoffset;
- struct dma_fence *fence;
+ struct drm_suballoc_manager base;
+ struct amdgpu_bo *bo;
+ uint64_t gpu_addr;
+ void *cpu_ptr;
};
int amdgpu_fence_slab_init(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index bcccc348dbe2..df7eb0b7c4b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -69,7 +69,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (size) {
r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type],
- &ib->sa_bo, size, 256);
+ &ib->sa_bo, size);
if (r) {
dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
return r;
@@ -309,8 +309,7 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i],
- AMDGPU_IB_POOL_SIZE,
- AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_IB_POOL_SIZE, 256,
AMDGPU_GEM_DOMAIN_GTT);
if (r)
goto error;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 93207badf83f..5a85726ce853 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -336,15 +336,22 @@ uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
/*
* sub allocation
*/
+static inline struct amdgpu_sa_manager *
+to_amdgpu_sa_manager(struct drm_suballoc_manager *manager)
+{
+ return container_of(manager, struct amdgpu_sa_manager, base);
+}
-static inline uint64_t amdgpu_sa_bo_gpu_addr(struct amdgpu_sa_bo *sa_bo)
+static inline uint64_t amdgpu_sa_bo_gpu_addr(struct drm_suballoc *sa_bo)
{
- return sa_bo->manager->gpu_addr + sa_bo->soffset;
+ return to_amdgpu_sa_manager(sa_bo->manager)->gpu_addr +
+ drm_suballoc_soffset(sa_bo);
}
-static inline void * amdgpu_sa_bo_cpu_addr(struct amdgpu_sa_bo *sa_bo)
+static inline void *amdgpu_sa_bo_cpu_addr(struct drm_suballoc *sa_bo)
{
- return sa_bo->manager->cpu_ptr + sa_bo->soffset;
+ return to_amdgpu_sa_manager(sa_bo->manager)->cpu_ptr +
+ drm_suballoc_soffset(sa_bo);
}
int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
@@ -355,11 +362,11 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager);
int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
- struct amdgpu_sa_bo **sa_bo,
- unsigned size, unsigned align);
+ struct drm_suballoc **sa_bo,
+ unsigned int size);
void amdgpu_sa_bo_free(struct amdgpu_device *adev,
- struct amdgpu_sa_bo **sa_bo,
- struct dma_fence *fence);
+ struct drm_suballoc **sa_bo,
+ struct dma_fence *fence);
#if defined(CONFIG_DEBUG_FS)
void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
struct seq_file *m);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 3989e755a5b4..018f36b10de8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -27,6 +27,7 @@
#include <drm/amdgpu_drm.h>
#include <drm/gpu_scheduler.h>
#include <drm/drm_print.h>
+#include <drm/drm_suballoc.h>
struct amdgpu_device;
struct amdgpu_ring;
@@ -92,7 +93,7 @@ enum amdgpu_ib_pool_type {
};
struct amdgpu_ib {
- struct amdgpu_sa_bo *sa_bo;
+ struct drm_suballoc *sa_bo;
uint32_t length_dw;
uint64_t gpu_addr;
uint32_t *ptr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 524d10b21041..c6b4337eb20c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -44,327 +44,63 @@
#include "amdgpu.h"
-static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo);
-static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager);
-
int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager,
- unsigned size, u32 align, u32 domain)
+ unsigned int size, u32 suballoc_align, u32 domain)
{
- int i, r;
-
- init_waitqueue_head(&sa_manager->wq);
- sa_manager->bo = NULL;
- sa_manager->size = size;
- sa_manager->domain = domain;
- sa_manager->align = align;
- sa_manager->hole = &sa_manager->olist;
- INIT_LIST_HEAD(&sa_manager->olist);
- for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
- INIT_LIST_HEAD(&sa_manager->flist[i]);
+ int r;
- r = amdgpu_bo_create_kernel(adev, size, align, domain, &sa_manager->bo,
- &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
+ r = amdgpu_bo_create_kernel(adev, size, AMDGPU_GPU_PAGE_SIZE, domain,
+ &sa_manager->bo, &sa_manager->gpu_addr,
+ &sa_manager->cpu_ptr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r);
return r;
}
- memset(sa_manager->cpu_ptr, 0, sa_manager->size);
+ memset(sa_manager->cpu_ptr, 0, size);
+ drm_suballoc_manager_init(&sa_manager->base, size, suballoc_align);
return r;
}
void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager)
{
- struct amdgpu_sa_bo *sa_bo, *tmp;
-
if (sa_manager->bo == NULL) {
dev_err(adev->dev, "no bo for sa manager\n");
return;
}
- if (!list_empty(&sa_manager->olist)) {
- sa_manager->hole = &sa_manager->olist,
- amdgpu_sa_bo_try_free(sa_manager);
- if (!list_empty(&sa_manager->olist)) {
- dev_err(adev->dev, "sa_manager is not empty, clearing anyway\n");
- }
- }
- list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
- amdgpu_sa_bo_remove_locked(sa_bo);
- }
+ drm_suballoc_manager_fini(&sa_manager->base);
amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
- sa_manager->size = 0;
}
-static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
-{
- struct amdgpu_sa_manager *sa_manager = sa_bo->manager;
- if (sa_manager->hole == &sa_bo->olist) {
- sa_manager->hole = sa_bo->olist.prev;
- }
- list_del_init(&sa_bo->olist);
- list_del_init(&sa_bo->flist);
- dma_fence_put(sa_bo->fence);
- kfree(sa_bo);
-}
-
-static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager)
+int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
+ struct drm_suballoc **sa_bo,
+ unsigned int size)
{
- struct amdgpu_sa_bo *sa_bo, *tmp;
+ struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size,
+ GFP_KERNEL, true, 0);
- if (sa_manager->hole->next == &sa_manager->olist)
- return;
+ if (IS_ERR(sa)) {
+ *sa_bo = NULL;
- sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist);
- list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
- if (sa_bo->fence == NULL ||
- !dma_fence_is_signaled(sa_bo->fence)) {
- return;
- }
- amdgpu_sa_bo_remove_locked(sa_bo);
+ return PTR_ERR(sa);
}
-}
-static inline unsigned amdgpu_sa_bo_hole_soffset(struct amdgpu_sa_manager *sa_manager)
-{
- struct list_head *hole = sa_manager->hole;
-
- if (hole != &sa_manager->olist) {
- return list_entry(hole, struct amdgpu_sa_bo, olist)->eoffset;
- }
+ *sa_bo = sa;
return 0;
}
-static inline unsigned amdgpu_sa_bo_hole_eoffset(struct amdgpu_sa_manager *sa_manager)
-{
- struct list_head *hole = sa_manager->hole;
-
- if (hole->next != &sa_manager->olist) {
- return list_entry(hole->next, struct amdgpu_sa_bo, olist)->soffset;
- }
- return sa_manager->size;
-}
-
-static bool amdgpu_sa_bo_try_alloc(struct amdgpu_sa_manager *sa_manager,
- struct amdgpu_sa_bo *sa_bo,
- unsigned size, unsigned align)
-{
- unsigned soffset, eoffset, wasted;
-
- soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
- eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
- wasted = (align - (soffset % align)) % align;
-
- if ((eoffset - soffset) >= (size + wasted)) {
- soffset += wasted;
-
- sa_bo->manager = sa_manager;
- sa_bo->soffset = soffset;
- sa_bo->eoffset = soffset + size;
- list_add(&sa_bo->olist, sa_manager->hole);
- INIT_LIST_HEAD(&sa_bo->flist);
- sa_manager->hole = &sa_bo->olist;
- return true;
- }
- return false;
-}
-
-/**
- * amdgpu_sa_event - Check if we can stop waiting
- *
- * @sa_manager: pointer to the sa_manager
- * @size: number of bytes we want to allocate
- * @align: alignment we need to match
- *
- * Check if either there is a fence we can wait for or
- * enough free memory to satisfy the allocation directly
- */
-static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager,
- unsigned size, unsigned align)
-{
- unsigned soffset, eoffset, wasted;
- int i;
-
- for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
- if (!list_empty(&sa_manager->flist[i]))
- return true;
-
- soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
- eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
- wasted = (align - (soffset % align)) % align;
-
- if ((eoffset - soffset) >= (size + wasted)) {
- return true;
- }
-
- return false;
-}
-
-static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
- struct dma_fence **fences,
- unsigned *tries)
-{
- struct amdgpu_sa_bo *best_bo = NULL;
- unsigned i, soffset, best, tmp;
-
- /* if hole points to the end of the buffer */
- if (sa_manager->hole->next == &sa_manager->olist) {
- /* try again with its beginning */
- sa_manager->hole = &sa_manager->olist;
- return true;
- }
-
- soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
- /* to handle wrap around we add sa_manager->size */
- best = sa_manager->size * 2;
- /* go over all fence list and try to find the closest sa_bo
- * of the current last
- */
- for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
- struct amdgpu_sa_bo *sa_bo;
-
- fences[i] = NULL;
-
- if (list_empty(&sa_manager->flist[i]))
- continue;
-
- sa_bo = list_first_entry(&sa_manager->flist[i],
- struct amdgpu_sa_bo, flist);
-
- if (!dma_fence_is_signaled(sa_bo->fence)) {
- fences[i] = sa_bo->fence;
- continue;
- }
-
- /* limit the number of tries each ring gets */
- if (tries[i] > 2) {
- continue;
- }
-
- tmp = sa_bo->soffset;
- if (tmp < soffset) {
- /* wrap around, pretend it's after */
- tmp += sa_manager->size;
- }
- tmp -= soffset;
- if (tmp < best) {
- /* this sa bo is the closest one */
- best = tmp;
- best_bo = sa_bo;
- }
- }
-
- if (best_bo) {
- uint32_t idx = best_bo->fence->context;
-
- idx %= AMDGPU_SA_NUM_FENCE_LISTS;
- ++tries[idx];
- sa_manager->hole = best_bo->olist.prev;
-
- /* we knew that this one is signaled,
- so it's save to remote it */
- amdgpu_sa_bo_remove_locked(best_bo);
- return true;
- }
- return false;
-}
-
-int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
- struct amdgpu_sa_bo **sa_bo,
- unsigned size, unsigned align)
-{
- struct dma_fence *fences[AMDGPU_SA_NUM_FENCE_LISTS];
- unsigned tries[AMDGPU_SA_NUM_FENCE_LISTS];
- unsigned count;
- int i, r;
- signed long t;
-
- if (WARN_ON_ONCE(align > sa_manager->align))
- return -EINVAL;
-
- if (WARN_ON_ONCE(size > sa_manager->size))
- return -EINVAL;
-
- *sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL);
- if (!(*sa_bo))
- return -ENOMEM;
- (*sa_bo)->manager = sa_manager;
- (*sa_bo)->fence = NULL;
- INIT_LIST_HEAD(&(*sa_bo)->olist);
- INIT_LIST_HEAD(&(*sa_bo)->flist);
-
- spin_lock(&sa_manager->wq.lock);
- do {
- for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
- tries[i] = 0;
-
- do {
- amdgpu_sa_bo_try_free(sa_manager);
-
- if (amdgpu_sa_bo_try_alloc(sa_manager, *sa_bo,
- size, align)) {
- spin_unlock(&sa_manager->wq.lock);
- return 0;
- }
-
- /* see if we can skip over some allocations */
- } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
-
- for (i = 0, count = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
- if (fences[i])
- fences[count++] = dma_fence_get(fences[i]);
-
- if (count) {
- spin_unlock(&sa_manager->wq.lock);
- t = dma_fence_wait_any_timeout(fences, count, false,
- MAX_SCHEDULE_TIMEOUT,
- NULL);
- for (i = 0; i < count; ++i)
- dma_fence_put(fences[i]);
-
- r = (t > 0) ? 0 : t;
- spin_lock(&sa_manager->wq.lock);
- } else {
- /* if we have nothing to wait for block */
- r = wait_event_interruptible_locked(
- sa_manager->wq,
- amdgpu_sa_event(sa_manager, size, align)
- );
- }
-
- } while (!r);
-
- spin_unlock(&sa_manager->wq.lock);
- kfree(*sa_bo);
- *sa_bo = NULL;
- return r;
-}
-
-void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
+void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct drm_suballoc **sa_bo,
struct dma_fence *fence)
{
- struct amdgpu_sa_manager *sa_manager;
-
if (sa_bo == NULL || *sa_bo == NULL) {
return;
}
- sa_manager = (*sa_bo)->manager;
- spin_lock(&sa_manager->wq.lock);
- if (fence && !dma_fence_is_signaled(fence)) {
- uint32_t idx;
-
- (*sa_bo)->fence = dma_fence_get(fence);
- idx = fence->context % AMDGPU_SA_NUM_FENCE_LISTS;
- list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
- } else {
- amdgpu_sa_bo_remove_locked(*sa_bo);
- }
- wake_up_all_locked(&sa_manager->wq);
- spin_unlock(&sa_manager->wq.lock);
+ drm_suballoc_free(*sa_bo, fence);
*sa_bo = NULL;
}
@@ -373,26 +109,8 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
struct seq_file *m)
{
- struct amdgpu_sa_bo *i;
-
- spin_lock(&sa_manager->wq.lock);
- list_for_each_entry(i, &sa_manager->olist, olist) {
- uint64_t soffset = i->soffset + sa_manager->gpu_addr;
- uint64_t eoffset = i->eoffset + sa_manager->gpu_addr;
- if (&i->olist == sa_manager->hole) {
- seq_printf(m, ">");
- } else {
- seq_printf(m, " ");
- }
- seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
- soffset, eoffset, eoffset - soffset);
+ struct drm_printer p = drm_seq_file_printer(m);
- if (i->fence)
- seq_printf(m, " protected by 0x%016llx on context %llu",
- i->fence->seqno, i->fence->context);
-
- seq_printf(m, "\n");
- }
- spin_unlock(&sa_manager->wq.lock);
+ drm_suballoc_dump_debug_info(&sa_manager->base, &p, sa_manager->gpu_addr);
}
#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Intel-xe] [PATCH 3/3] drm/radeon: Use the drm suballocation manager implementation.
2023-02-23 10:57 [Intel-xe] [PATCH 0/3] drm/helpers: Make the suballocation manager drm generic Thomas Hellström
2023-02-23 10:57 ` [Intel-xe] [PATCH 1/3] drm/suballoc: Extract amdgpu_sa.c as generic suballocation helper Thomas Hellström
2023-02-23 10:57 ` [Intel-xe] [PATCH 2/3] drm/amd: Convert amdgpu to use " Thomas Hellström
@ 2023-02-23 10:57 ` Thomas Hellström
2023-02-23 11:18 ` Christian König
2 siblings, 1 reply; 12+ messages in thread
From: Thomas Hellström @ 2023-02-23 10:57 UTC (permalink / raw)
To: dri-devel
Cc: Daniel Vetter, Maarten Lankhorst, Christian Koenig, Dave Airlie,
intel-xe
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Use the generic suballocation helper for radeon.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/radeon/radeon.h | 55 +---
drivers/gpu/drm/radeon/radeon_ib.c | 12 +-
drivers/gpu/drm/radeon/radeon_object.h | 25 +-
drivers/gpu/drm/radeon/radeon_sa.c | 316 ++--------------------
drivers/gpu/drm/radeon/radeon_semaphore.c | 4 +-
5 files changed, 56 insertions(+), 356 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 57e20780a458..d19a4b1c1a8f 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -79,6 +79,7 @@
#include <drm/drm_gem.h>
#include <drm/drm_audio_component.h>
+#include <drm/drm_suballoc.h>
#include "radeon_family.h"
#include "radeon_mode.h"
@@ -511,52 +512,12 @@ struct radeon_bo {
};
#define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, tbo.base)
-/* sub-allocation manager, it has to be protected by another lock.
- * By conception this is an helper for other part of the driver
- * like the indirect buffer or semaphore, which both have their
- * locking.
- *
- * Principe is simple, we keep a list of sub allocation in offset
- * order (first entry has offset == 0, last entry has the highest
- * offset).
- *
- * When allocating new object we first check if there is room at
- * the end total_size - (last_object_offset + last_object_size) >=
- * alloc_size. If so we allocate new object there.
- *
- * When there is not enough room at the end, we start waiting for
- * each sub object until we reach object_offset+object_size >=
- * alloc_size, this object then become the sub object we return.
- *
- * Alignment can't be bigger than page size.
- *
- * Hole are not considered for allocation to keep things simple.
- * Assumption is that there won't be hole (all object on same
- * alignment).
- */
struct radeon_sa_manager {
- wait_queue_head_t wq;
- struct radeon_bo *bo;
- struct list_head *hole;
- struct list_head flist[RADEON_NUM_RINGS];
- struct list_head olist;
- unsigned size;
- uint64_t gpu_addr;
- void *cpu_ptr;
- uint32_t domain;
- uint32_t align;
-};
-
-struct radeon_sa_bo;
-
-/* sub-allocation buffer */
-struct radeon_sa_bo {
- struct list_head olist;
- struct list_head flist;
- struct radeon_sa_manager *manager;
- unsigned soffset;
- unsigned eoffset;
- struct radeon_fence *fence;
+ struct drm_suballoc_manager base;
+ struct radeon_bo *bo;
+ uint64_t gpu_addr;
+ void *cpu_ptr;
+ u32 domain;
};
/*
@@ -587,7 +548,7 @@ int radeon_mode_dumb_mmap(struct drm_file *filp,
* Semaphores.
*/
struct radeon_semaphore {
- struct radeon_sa_bo *sa_bo;
+ struct drm_suballoc *sa_bo;
signed waiters;
uint64_t gpu_addr;
};
@@ -816,7 +777,7 @@ void radeon_irq_kms_disable_hpd(struct radeon_device *rdev, unsigned hpd_mask);
*/
struct radeon_ib {
- struct radeon_sa_bo *sa_bo;
+ struct drm_suballoc *sa_bo;
uint32_t length_dw;
uint64_t gpu_addr;
uint32_t *ptr;
diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c
index 62b116727b4f..6a45a72488f9 100644
--- a/drivers/gpu/drm/radeon/radeon_ib.c
+++ b/drivers/gpu/drm/radeon/radeon_ib.c
@@ -61,7 +61,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
{
int r;
- r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256);
+ r = radeon_sa_bo_new(&rdev->ring_tmp_bo, &ib->sa_bo, size, 256);
if (r) {
dev_err(rdev->dev, "failed to get a new IB (%d)\n", r);
return r;
@@ -77,7 +77,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
/* ib pool is bound at RADEON_VA_IB_OFFSET in virtual address
* space and soffset is the offset inside the pool bo
*/
- ib->gpu_addr = ib->sa_bo->soffset + RADEON_VA_IB_OFFSET;
+ ib->gpu_addr = drm_suballoc_soffset(ib->sa_bo) + RADEON_VA_IB_OFFSET;
} else {
ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo);
}
@@ -97,7 +97,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
{
radeon_sync_free(rdev, &ib->sync, ib->fence);
- radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence);
+ radeon_sa_bo_free(&ib->sa_bo, ib->fence);
radeon_fence_unref(&ib->fence);
}
@@ -201,8 +201,7 @@ int radeon_ib_pool_init(struct radeon_device *rdev)
if (rdev->family >= CHIP_BONAIRE) {
r = radeon_sa_bo_manager_init(rdev, &rdev->ring_tmp_bo,
- RADEON_IB_POOL_SIZE*64*1024,
- RADEON_GPU_PAGE_SIZE,
+ RADEON_IB_POOL_SIZE*64*1024, 256,
RADEON_GEM_DOMAIN_GTT,
RADEON_GEM_GTT_WC);
} else {
@@ -210,8 +209,7 @@ int radeon_ib_pool_init(struct radeon_device *rdev)
* to the command stream checking
*/
r = radeon_sa_bo_manager_init(rdev, &rdev->ring_tmp_bo,
- RADEON_IB_POOL_SIZE*64*1024,
- RADEON_GPU_PAGE_SIZE,
+ RADEON_IB_POOL_SIZE*64*1024, 256,
RADEON_GEM_DOMAIN_GTT, 0);
}
if (r) {
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 0a6ef49e990a..39cc87a59a9a 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -169,15 +169,22 @@ extern void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
/*
* sub allocation
*/
+static inline struct radeon_sa_manager *
+to_radeon_sa_manager(struct drm_suballoc_manager *manager)
+{
+ return container_of(manager, struct radeon_sa_manager, base);
+}
-static inline uint64_t radeon_sa_bo_gpu_addr(struct radeon_sa_bo *sa_bo)
+static inline uint64_t radeon_sa_bo_gpu_addr(struct drm_suballoc *sa_bo)
{
- return sa_bo->manager->gpu_addr + sa_bo->soffset;
+ return to_radeon_sa_manager(sa_bo->manager)->gpu_addr +
+ drm_suballoc_soffset(sa_bo);
}
-static inline void * radeon_sa_bo_cpu_addr(struct radeon_sa_bo *sa_bo)
+static inline void *radeon_sa_bo_cpu_addr(struct drm_suballoc *sa_bo)
{
- return sa_bo->manager->cpu_ptr + sa_bo->soffset;
+ return to_radeon_sa_manager(sa_bo->manager)->cpu_ptr +
+ drm_suballoc_soffset(sa_bo);
}
extern int radeon_sa_bo_manager_init(struct radeon_device *rdev,
@@ -190,12 +197,10 @@ extern int radeon_sa_bo_manager_start(struct radeon_device *rdev,
struct radeon_sa_manager *sa_manager);
extern int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
struct radeon_sa_manager *sa_manager);
-extern int radeon_sa_bo_new(struct radeon_device *rdev,
- struct radeon_sa_manager *sa_manager,
- struct radeon_sa_bo **sa_bo,
- unsigned size, unsigned align);
-extern void radeon_sa_bo_free(struct radeon_device *rdev,
- struct radeon_sa_bo **sa_bo,
+extern int radeon_sa_bo_new(struct radeon_sa_manager *sa_manager,
+ struct drm_suballoc **sa_bo,
+ unsigned int size, unsigned int align);
+extern void radeon_sa_bo_free(struct drm_suballoc **sa_bo,
struct radeon_fence *fence);
#if defined(CONFIG_DEBUG_FS)
extern void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
index 0981948bd9ed..c87a57c9c592 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -44,53 +44,32 @@
#include "radeon.h"
-static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo);
-static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager);
-
int radeon_sa_bo_manager_init(struct radeon_device *rdev,
struct radeon_sa_manager *sa_manager,
- unsigned size, u32 align, u32 domain, u32 flags)
+ unsigned int size, u32 sa_align, u32 domain,
+ u32 flags)
{
- int i, r;
-
- init_waitqueue_head(&sa_manager->wq);
- sa_manager->bo = NULL;
- sa_manager->size = size;
- sa_manager->domain = domain;
- sa_manager->align = align;
- sa_manager->hole = &sa_manager->olist;
- INIT_LIST_HEAD(&sa_manager->olist);
- for (i = 0; i < RADEON_NUM_RINGS; ++i) {
- INIT_LIST_HEAD(&sa_manager->flist[i]);
- }
+ int r;
- r = radeon_bo_create(rdev, size, align, true,
+ r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true,
domain, flags, NULL, NULL, &sa_manager->bo);
if (r) {
dev_err(rdev->dev, "(%d) failed to allocate bo for manager\n", r);
return r;
}
+ sa_manager->domain = domain;
+
+ drm_suballoc_manager_init(&sa_manager->base, size, sa_align);
+
return r;
}
void radeon_sa_bo_manager_fini(struct radeon_device *rdev,
struct radeon_sa_manager *sa_manager)
{
- struct radeon_sa_bo *sa_bo, *tmp;
-
- if (!list_empty(&sa_manager->olist)) {
- sa_manager->hole = &sa_manager->olist,
- radeon_sa_bo_try_free(sa_manager);
- if (!list_empty(&sa_manager->olist)) {
- dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");
- }
- }
- list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
- radeon_sa_bo_remove_locked(sa_bo);
- }
+ drm_suballoc_manager_fini(&sa_manager->base);
radeon_bo_unref(&sa_manager->bo);
- sa_manager->size = 0;
}
int radeon_sa_bo_manager_start(struct radeon_device *rdev,
@@ -139,260 +118,34 @@ int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
return r;
}
-static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo)
+int radeon_sa_bo_new(struct radeon_sa_manager *sa_manager,
+ struct drm_suballoc **sa_bo,
+ unsigned int size, unsigned int align)
{
- struct radeon_sa_manager *sa_manager = sa_bo->manager;
- if (sa_manager->hole == &sa_bo->olist) {
- sa_manager->hole = sa_bo->olist.prev;
- }
- list_del_init(&sa_bo->olist);
- list_del_init(&sa_bo->flist);
- radeon_fence_unref(&sa_bo->fence);
- kfree(sa_bo);
-}
-
-static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager)
-{
- struct radeon_sa_bo *sa_bo, *tmp;
-
- if (sa_manager->hole->next == &sa_manager->olist)
- return;
+ struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size,
+ GFP_KERNEL, true, align);
- sa_bo = list_entry(sa_manager->hole->next, struct radeon_sa_bo, olist);
- list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
- if (sa_bo->fence == NULL || !radeon_fence_signaled(sa_bo->fence)) {
- return;
- }
- radeon_sa_bo_remove_locked(sa_bo);
+ if (IS_ERR(sa)) {
+ *sa_bo = NULL;
+ return PTR_ERR(sa);
}
-}
-static inline unsigned radeon_sa_bo_hole_soffset(struct radeon_sa_manager *sa_manager)
-{
- struct list_head *hole = sa_manager->hole;
-
- if (hole != &sa_manager->olist) {
- return list_entry(hole, struct radeon_sa_bo, olist)->eoffset;
- }
+ *sa_bo = sa;
return 0;
}
-static inline unsigned radeon_sa_bo_hole_eoffset(struct radeon_sa_manager *sa_manager)
-{
- struct list_head *hole = sa_manager->hole;
-
- if (hole->next != &sa_manager->olist) {
- return list_entry(hole->next, struct radeon_sa_bo, olist)->soffset;
- }
- return sa_manager->size;
-}
-
-static bool radeon_sa_bo_try_alloc(struct radeon_sa_manager *sa_manager,
- struct radeon_sa_bo *sa_bo,
- unsigned size, unsigned align)
-{
- unsigned soffset, eoffset, wasted;
-
- soffset = radeon_sa_bo_hole_soffset(sa_manager);
- eoffset = radeon_sa_bo_hole_eoffset(sa_manager);
- wasted = (align - (soffset % align)) % align;
-
- if ((eoffset - soffset) >= (size + wasted)) {
- soffset += wasted;
-
- sa_bo->manager = sa_manager;
- sa_bo->soffset = soffset;
- sa_bo->eoffset = soffset + size;
- list_add(&sa_bo->olist, sa_manager->hole);
- INIT_LIST_HEAD(&sa_bo->flist);
- sa_manager->hole = &sa_bo->olist;
- return true;
- }
- return false;
-}
-
-/**
- * radeon_sa_event - Check if we can stop waiting
- *
- * @sa_manager: pointer to the sa_manager
- * @size: number of bytes we want to allocate
- * @align: alignment we need to match
- *
- * Check if either there is a fence we can wait for or
- * enough free memory to satisfy the allocation directly
- */
-static bool radeon_sa_event(struct radeon_sa_manager *sa_manager,
- unsigned size, unsigned align)
-{
- unsigned soffset, eoffset, wasted;
- int i;
-
- for (i = 0; i < RADEON_NUM_RINGS; ++i) {
- if (!list_empty(&sa_manager->flist[i])) {
- return true;
- }
- }
-
- soffset = radeon_sa_bo_hole_soffset(sa_manager);
- eoffset = radeon_sa_bo_hole_eoffset(sa_manager);
- wasted = (align - (soffset % align)) % align;
-
- if ((eoffset - soffset) >= (size + wasted)) {
- return true;
- }
-
- return false;
-}
-
-static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager,
- struct radeon_fence **fences,
- unsigned *tries)
-{
- struct radeon_sa_bo *best_bo = NULL;
- unsigned i, soffset, best, tmp;
-
- /* if hole points to the end of the buffer */
- if (sa_manager->hole->next == &sa_manager->olist) {
- /* try again with its beginning */
- sa_manager->hole = &sa_manager->olist;
- return true;
- }
-
- soffset = radeon_sa_bo_hole_soffset(sa_manager);
- /* to handle wrap around we add sa_manager->size */
- best = sa_manager->size * 2;
- /* go over all fence list and try to find the closest sa_bo
- * of the current last
- */
- for (i = 0; i < RADEON_NUM_RINGS; ++i) {
- struct radeon_sa_bo *sa_bo;
-
- fences[i] = NULL;
-
- if (list_empty(&sa_manager->flist[i])) {
- continue;
- }
-
- sa_bo = list_first_entry(&sa_manager->flist[i],
- struct radeon_sa_bo, flist);
-
- if (!radeon_fence_signaled(sa_bo->fence)) {
- fences[i] = sa_bo->fence;
- continue;
- }
-
- /* limit the number of tries each ring gets */
- if (tries[i] > 2) {
- continue;
- }
-
- tmp = sa_bo->soffset;
- if (tmp < soffset) {
- /* wrap around, pretend it's after */
- tmp += sa_manager->size;
- }
- tmp -= soffset;
- if (tmp < best) {
- /* this sa bo is the closest one */
- best = tmp;
- best_bo = sa_bo;
- }
- }
-
- if (best_bo) {
- ++tries[best_bo->fence->ring];
- sa_manager->hole = best_bo->olist.prev;
-
- /* we knew that this one is signaled,
- so it's save to remote it */
- radeon_sa_bo_remove_locked(best_bo);
- return true;
- }
- return false;
-}
-
-int radeon_sa_bo_new(struct radeon_device *rdev,
- struct radeon_sa_manager *sa_manager,
- struct radeon_sa_bo **sa_bo,
- unsigned size, unsigned align)
-{
- struct radeon_fence *fences[RADEON_NUM_RINGS];
- unsigned tries[RADEON_NUM_RINGS];
- int i, r;
-
- BUG_ON(align > sa_manager->align);
- BUG_ON(size > sa_manager->size);
-
- *sa_bo = kmalloc(sizeof(struct radeon_sa_bo), GFP_KERNEL);
- if ((*sa_bo) == NULL) {
- return -ENOMEM;
- }
- (*sa_bo)->manager = sa_manager;
- (*sa_bo)->fence = NULL;
- INIT_LIST_HEAD(&(*sa_bo)->olist);
- INIT_LIST_HEAD(&(*sa_bo)->flist);
-
- spin_lock(&sa_manager->wq.lock);
- do {
- for (i = 0; i < RADEON_NUM_RINGS; ++i)
- tries[i] = 0;
-
- do {
- radeon_sa_bo_try_free(sa_manager);
-
- if (radeon_sa_bo_try_alloc(sa_manager, *sa_bo,
- size, align)) {
- spin_unlock(&sa_manager->wq.lock);
- return 0;
- }
-
- /* see if we can skip over some allocations */
- } while (radeon_sa_bo_next_hole(sa_manager, fences, tries));
-
- for (i = 0; i < RADEON_NUM_RINGS; ++i)
- radeon_fence_ref(fences[i]);
-
- spin_unlock(&sa_manager->wq.lock);
- r = radeon_fence_wait_any(rdev, fences, false);
- for (i = 0; i < RADEON_NUM_RINGS; ++i)
- radeon_fence_unref(&fences[i]);
- spin_lock(&sa_manager->wq.lock);
- /* if we have nothing to wait for block */
- if (r == -ENOENT) {
- r = wait_event_interruptible_locked(
- sa_manager->wq,
- radeon_sa_event(sa_manager, size, align)
- );
- }
-
- } while (!r);
-
- spin_unlock(&sa_manager->wq.lock);
- kfree(*sa_bo);
- *sa_bo = NULL;
- return r;
-}
-
-void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
+void radeon_sa_bo_free(struct drm_suballoc **sa_bo,
struct radeon_fence *fence)
{
- struct radeon_sa_manager *sa_manager;
-
if (sa_bo == NULL || *sa_bo == NULL) {
return;
}
- sa_manager = (*sa_bo)->manager;
- spin_lock(&sa_manager->wq.lock);
- if (fence && !radeon_fence_signaled(fence)) {
- (*sa_bo)->fence = radeon_fence_ref(fence);
- list_add_tail(&(*sa_bo)->flist,
- &sa_manager->flist[fence->ring]);
- } else {
- radeon_sa_bo_remove_locked(*sa_bo);
- }
- wake_up_all_locked(&sa_manager->wq);
- spin_unlock(&sa_manager->wq.lock);
+ if (fence)
+ drm_suballoc_free(*sa_bo, &fence->base);
+ else
+ drm_suballoc_free(*sa_bo, NULL);
+
*sa_bo = NULL;
}
@@ -400,25 +153,8 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
struct seq_file *m)
{
- struct radeon_sa_bo *i;
+ struct drm_printer p = drm_seq_file_printer(m);
- spin_lock(&sa_manager->wq.lock);
- list_for_each_entry(i, &sa_manager->olist, olist) {
- uint64_t soffset = i->soffset + sa_manager->gpu_addr;
- uint64_t eoffset = i->eoffset + sa_manager->gpu_addr;
- if (&i->olist == sa_manager->hole) {
- seq_printf(m, ">");
- } else {
- seq_printf(m, " ");
- }
- seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
- soffset, eoffset, eoffset - soffset);
- if (i->fence) {
- seq_printf(m, " protected by 0x%016llx on ring %d",
- i->fence->seq, i->fence->ring);
- }
- seq_printf(m, "\n");
- }
- spin_unlock(&sa_manager->wq.lock);
+ drm_suballoc_dump_debug_info(&sa_manager->base, &p, sa_manager->gpu_addr);
}
#endif
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
index 221e59476f64..1f0a9a4ff5ae 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -40,7 +40,7 @@ int radeon_semaphore_create(struct radeon_device *rdev,
if (*semaphore == NULL) {
return -ENOMEM;
}
- r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo,
+ r = radeon_sa_bo_new(&rdev->ring_tmp_bo,
&(*semaphore)->sa_bo, 8, 8);
if (r) {
kfree(*semaphore);
@@ -100,7 +100,7 @@ void radeon_semaphore_free(struct radeon_device *rdev,
dev_err(rdev->dev, "semaphore %p has more waiters than signalers,"
" hardware lockup imminent!\n", *semaphore);
}
- radeon_sa_bo_free(rdev, &(*semaphore)->sa_bo, fence);
+ radeon_sa_bo_free(&(*semaphore)->sa_bo, fence);
kfree(*semaphore);
*semaphore = NULL;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [Intel-xe] [PATCH 1/3] drm/suballoc: Extract amdgpu_sa.c as generic suballocation helper
2023-02-23 10:57 ` [Intel-xe] [PATCH 1/3] drm/suballoc: Extract amdgpu_sa.c as generic suballocation helper Thomas Hellström
@ 2023-02-23 11:13 ` Christian König
2023-02-23 11:22 ` Thomas Hellström
0 siblings, 1 reply; 12+ messages in thread
From: Christian König @ 2023-02-23 11:13 UTC (permalink / raw)
To: Thomas Hellström, dri-devel
Cc: Daniel Vetter, Maarten Lankhorst, intel-xe, Dave Airlie
Am 23.02.23 um 11:57 schrieb Thomas Hellström:
> From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>
> Suballocating a buffer object is something that is not driver-specific
> and useful for many drivers.
>
> Use a slightly modified version of amdgpu_sa.c
>
> v2:
> - Style cleanups. (Thomas)
> - Added / Modified documentation (Thomas)
> - Use u64 for the sizes and offset. The code dates back to 2012 and
> using unsigned int will probably soon come back to bite us.
> We can consider size_t as well for better 32-bit efficiency. (Thomas)
> - Add and document gfp, intr and align arguments to drm_suballoc_new()
> (Thomas)
> - Use drm_printer for debug output (Thomas)
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/Kconfig | 4 +
> drivers/gpu/drm/Makefile | 3 +
> drivers/gpu/drm/drm_suballoc.c | 457 +++++++++++++++++++++++++++++++++
> include/drm/drm_suballoc.h | 106 ++++++++
> 4 files changed, 570 insertions(+)
> create mode 100644 drivers/gpu/drm/drm_suballoc.c
> create mode 100644 include/drm/drm_suballoc.h
>
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index dc0f94f02a82..8fbe57407c60 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -232,6 +232,10 @@ config DRM_GEM_SHMEM_HELPER
> help
> Choose this if you need the GEM shmem helper functions
>
> +config DRM_SUBALLOC_HELPER
> + tristate
> + depends on DRM
> +
> config DRM_SCHED
> tristate
> depends on DRM
> diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> index ab4460fcd63f..1e04d135e866 100644
> --- a/drivers/gpu/drm/Makefile
> +++ b/drivers/gpu/drm/Makefile
> @@ -88,6 +88,9 @@ obj-$(CONFIG_DRM_GEM_DMA_HELPER) += drm_dma_helper.o
> drm_shmem_helper-y := drm_gem_shmem_helper.o
> obj-$(CONFIG_DRM_GEM_SHMEM_HELPER) += drm_shmem_helper.o
>
> +drm_suballoc_helper-y := drm_suballoc.o
> +obj-$(CONFIG_DRM_SUBALLOC_HELPER) += drm_suballoc_helper.o
> +
> drm_vram_helper-y := drm_gem_vram_helper.o
> obj-$(CONFIG_DRM_VRAM_HELPER) += drm_vram_helper.o
>
> diff --git a/drivers/gpu/drm/drm_suballoc.c b/drivers/gpu/drm/drm_suballoc.c
> new file mode 100644
> index 000000000000..057cd19c44ba
> --- /dev/null
> +++ b/drivers/gpu/drm/drm_suballoc.c
> @@ -0,0 +1,457 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/*
> + * Copyright 2011 Red Hat Inc.
> + * Copyright 2023 Intel Corporation.
> + * All Rights Reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction, including
> + * without limitation the rights to use, copy, modify, merge, publish,
> + * distribute, sub license, and/or sell copies of the Software, and to
> + * permit persons to whom the Software is furnished to do so, subject to
> + * the following conditions:
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * The above copyright notice and this permission notice (including the
> + * next paragraph) shall be included in all copies or substantial portions
> + * of the Software.
> + *
> + */
> +/*
> + * Authors:
> + * Jerome Glisse <glisse@freedesktop.org>
> + */
We should probably update this or just leave it out.
Apart from that Reviewed-by: Christian König <christian.koenig@amd.com>.
Regards,
Christian.
> +/* Algorithm:
> + *
> + * We store the last allocated bo in "hole", we always try to allocate
> + * after the last allocated bo. Principle is that in a linear GPU ring
> + * progression was is after last is the oldest bo we allocated and thus
> + * the first one that should no longer be in use by the GPU.
> + *
> + * If it's not the case we skip over the bo after last to the closest
> + * done bo if such one exist. If none exist and we are not asked to
> + * block we report failure to allocate.
> + *
> + * If we are asked to block we wait on all the oldest fence of all
> + * rings. We just wait for any of those fence to complete.
> + */
> +
> +#include <drm/drm_suballoc.h>
> +#include <drm/drm_print.h>
> +#include <linux/slab.h>
> +#include <linux/sched.h>
> +#include <linux/wait.h>
> +#include <linux/dma-fence.h>
> +
> +static void drm_suballoc_remove_locked(struct drm_suballoc *sa);
> +static void drm_suballoc_try_free(struct drm_suballoc_manager *sa_manager);
> +
> +/**
> + * drm_suballoc_manager_init() - Initialise the drm_suballoc_manager
> + * @sa_manager: pointer to the sa_manager
> + * @size: number of bytes we want to suballocate
> + * @align: alignment for each suballocated chunk
> + *
> + * Prepares the suballocation manager for suballocations.
> + */
> +void drm_suballoc_manager_init(struct drm_suballoc_manager *sa_manager,
> + u64 size, u64 align)
> +{
> + unsigned int i;
> +
> + if (!align)
> + align = 1;
> +
> + /* alignment must be a power of 2 */
> + if (WARN_ON_ONCE(align & (align - 1)))
> + align = roundup_pow_of_two(align);
> +
> + init_waitqueue_head(&sa_manager->wq);
> + sa_manager->size = size;
> + sa_manager->align = align;
> + sa_manager->hole = &sa_manager->olist;
> + INIT_LIST_HEAD(&sa_manager->olist);
> + for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i)
> + INIT_LIST_HEAD(&sa_manager->flist[i]);
> +}
> +EXPORT_SYMBOL(drm_suballoc_manager_init);
> +
> +/**
> + * drm_suballoc_manager_fini() - Destroy the drm_suballoc_manager
> + * @sa_manager: pointer to the sa_manager
> + *
> + * Cleans up the suballocation manager after use. All fences added
> + * with drm_suballoc_free() must be signaled, or we cannot clean up
> + * the entire manager.
> + */
> +void drm_suballoc_manager_fini(struct drm_suballoc_manager *sa_manager)
> +{
> + struct drm_suballoc *sa, *tmp;
> +
> + if (!sa_manager->size)
> + return;
> +
> + if (!list_empty(&sa_manager->olist)) {
> + sa_manager->hole = &sa_manager->olist;
> + drm_suballoc_try_free(sa_manager);
> + if (!list_empty(&sa_manager->olist))
> + DRM_ERROR("sa_manager is not empty, clearing anyway\n");
> + }
> + list_for_each_entry_safe(sa, tmp, &sa_manager->olist, olist) {
> + drm_suballoc_remove_locked(sa);
> + }
> +
> + sa_manager->size = 0;
> +}
> +EXPORT_SYMBOL(drm_suballoc_manager_fini);
> +
> +static void drm_suballoc_remove_locked(struct drm_suballoc *sa)
> +{
> + struct drm_suballoc_manager *sa_manager = sa->manager;
> +
> + if (sa_manager->hole == &sa->olist)
> + sa_manager->hole = sa->olist.prev;
> +
> + list_del_init(&sa->olist);
> + list_del_init(&sa->flist);
> + dma_fence_put(sa->fence);
> + kfree(sa);
> +}
> +
> +static void drm_suballoc_try_free(struct drm_suballoc_manager *sa_manager)
> +{
> + struct drm_suballoc *sa, *tmp;
> +
> + if (sa_manager->hole->next == &sa_manager->olist)
> + return;
> +
> + sa = list_entry(sa_manager->hole->next, struct drm_suballoc, olist);
> + list_for_each_entry_safe_from(sa, tmp, &sa_manager->olist, olist) {
> + if (!sa->fence || !dma_fence_is_signaled(sa->fence))
> + return;
> +
> + drm_suballoc_remove_locked(sa);
> + }
> +}
> +
> +static u64 drm_suballoc_hole_soffset(struct drm_suballoc_manager *sa_manager)
> +{
> + struct list_head *hole = sa_manager->hole;
> +
> + if (hole != &sa_manager->olist)
> + return list_entry(hole, struct drm_suballoc, olist)->eoffset;
> +
> + return 0;
> +}
> +
> +static u64 drm_suballoc_hole_eoffset(struct drm_suballoc_manager *sa_manager)
> +{
> + struct list_head *hole = sa_manager->hole;
> +
> + if (hole->next != &sa_manager->olist)
> + return list_entry(hole->next, struct drm_suballoc, olist)->soffset;
> + return sa_manager->size;
> +}
> +
> +static bool drm_suballoc_try_alloc(struct drm_suballoc_manager *sa_manager,
> + struct drm_suballoc *sa,
> + u64 size, u64 align)
> +{
> + u64 soffset, eoffset, wasted;
> +
> + soffset = drm_suballoc_hole_soffset(sa_manager);
> + eoffset = drm_suballoc_hole_eoffset(sa_manager);
> + wasted = (align - (soffset % align)) % align;
> +
> + if ((eoffset - soffset) >= (size + wasted)) {
> + soffset += wasted;
> +
> + sa->manager = sa_manager;
> + sa->soffset = soffset;
> + sa->eoffset = soffset + size;
> + list_add(&sa->olist, sa_manager->hole);
> + INIT_LIST_HEAD(&sa->flist);
> + sa_manager->hole = &sa->olist;
> + return true;
> + }
> + return false;
> +}
> +
> +static bool __drm_suballoc_event(struct drm_suballoc_manager *sa_manager,
> + u64 size, u64 align)
> +{
> + u64 soffset, eoffset, wasted;
> + unsigned int i;
> +
> + for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i)
> + if (!list_empty(&sa_manager->flist[i]))
> + return true;
> +
> + soffset = drm_suballoc_hole_soffset(sa_manager);
> + eoffset = drm_suballoc_hole_eoffset(sa_manager);
> + wasted = (align - (soffset % align)) % align;
> +
> + return ((eoffset - soffset) >= (size + wasted));
> +}
> +
> +/**
> + * drm_suballoc_event() - Check if we can stop waiting
> + * @sa_manager: pointer to the sa_manager
> + * @size: number of bytes we want to allocate
> + * @align: alignment we need to match
> + *
> + * Return: true if either there is a fence we can wait for or
> + * enough free memory to satisfy the allocation directly.
> + * false otherwise.
> + */
> +static bool drm_suballoc_event(struct drm_suballoc_manager *sa_manager,
> + u64 size, u64 align)
> +{
> + bool ret;
> +
> + spin_lock(&sa_manager->wq.lock);
> + ret = __drm_suballoc_event(sa_manager, size, align);
> + spin_unlock(&sa_manager->wq.lock);
> + return ret;
> +}
> +
> +static bool drm_suballoc_next_hole(struct drm_suballoc_manager *sa_manager,
> + struct dma_fence **fences,
> + unsigned int *tries)
> +{
> + struct drm_suballoc *best_bo = NULL;
> + unsigned int i, best_idx;
> + u64 soffset, best, tmp;
> +
> + /* if hole points to the end of the buffer */
> + if (sa_manager->hole->next == &sa_manager->olist) {
> + /* try again with its beginning */
> + sa_manager->hole = &sa_manager->olist;
> + return true;
> + }
> +
> + soffset = drm_suballoc_hole_soffset(sa_manager);
> + /* to handle wrap around we add sa_manager->size */
> + best = sa_manager->size * 2;
> + /* go over all fence list and try to find the closest sa
> + * of the current last
> + */
> + for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i) {
> + struct drm_suballoc *sa;
> +
> + fences[i] = NULL;
> +
> + if (list_empty(&sa_manager->flist[i]))
> + continue;
> +
> + sa = list_first_entry(&sa_manager->flist[i],
> + struct drm_suballoc, flist);
> +
> + if (!dma_fence_is_signaled(sa->fence)) {
> + fences[i] = sa->fence;
> + continue;
> + }
> +
> + /* limit the number of tries each freelist gets */
> + if (tries[i] > 2)
> + continue;
> +
> + tmp = sa->soffset;
> + if (tmp < soffset) {
> + /* wrap around, pretend it's after */
> + tmp += sa_manager->size;
> + }
> + tmp -= soffset;
> + if (tmp < best) {
> + /* this sa bo is the closest one */
> + best = tmp;
> + best_idx = i;
> + best_bo = sa;
> + }
> + }
> +
> + if (best_bo) {
> + ++tries[best_idx];
> + sa_manager->hole = best_bo->olist.prev;
> +
> + /*
> + * We know that this one is signaled,
> + * so it's safe to remove it.
> + */
> + drm_suballoc_remove_locked(best_bo);
> + return true;
> + }
> + return false;
> +}
> +
> +/**
> + * drm_suballoc_new() - Make a suballocation.
> + * @sa_manager: pointer to the sa_manager
> + * @size: number of bytes we want to suballocate.
> + * @gfp: gfp flags used for memory allocation. Typically GFP_KERNEL but
> + * the argument is provided for suballocations from reclaim context or
> + * where the caller wants to avoid pipelining rather than wait for
> + * reclaim.
> + * @intr: Whether to perform waits interruptible. This should typically
> + * always be true, unless the caller needs to propagate a
> + * non-interruptible context from above layers.
> + * @align: Alignment. Must not exceed the default manager alignment.
> + * If @align is zero, then the manager alignment is used.
> + *
> + * Try to make a suballocation of size @size, which will be rounded
> + * up to the alignment specified in specified in drm_suballoc_manager_init().
> + *
> + * Return: a new suballocated bo, or an ERR_PTR.
> + */
> +struct drm_suballoc *
> +drm_suballoc_new(struct drm_suballoc_manager *sa_manager, u64 size,
> + gfp_t gfp, bool intr, u64 align)
> +{
> + struct dma_fence *fences[DRM_SUBALLOC_MAX_QUEUES];
> + unsigned int tries[DRM_SUBALLOC_MAX_QUEUES];
> + unsigned int count;
> + int i, r;
> + struct drm_suballoc *sa;
> +
> + if (WARN_ON_ONCE(align > sa_manager->align))
> + return ERR_PTR(-EINVAL);
> + if (WARN_ON_ONCE(size > sa_manager->size || !size))
> + return ERR_PTR(-EINVAL);
> +
> + if (!align)
> + align = sa_manager->align;
> +
> + sa = kmalloc(sizeof(*sa), gfp);
> + if (!sa)
> + return ERR_PTR(-ENOMEM);
> + sa->manager = sa_manager;
> + sa->fence = NULL;
> + INIT_LIST_HEAD(&sa->olist);
> + INIT_LIST_HEAD(&sa->flist);
> +
> + spin_lock(&sa_manager->wq.lock);
> + do {
> + for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i)
> + tries[i] = 0;
> +
> + do {
> + drm_suballoc_try_free(sa_manager);
> +
> + if (drm_suballoc_try_alloc(sa_manager, sa,
> + size, align)) {
> + spin_unlock(&sa_manager->wq.lock);
> + return sa;
> + }
> +
> + /* see if we can skip over some allocations */
> + } while (drm_suballoc_next_hole(sa_manager, fences, tries));
> +
> + for (i = 0, count = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i)
> + if (fences[i])
> + fences[count++] = dma_fence_get(fences[i]);
> +
> + if (count) {
> + long t;
> +
> + spin_unlock(&sa_manager->wq.lock);
> + t = dma_fence_wait_any_timeout(fences, count, intr,
> + MAX_SCHEDULE_TIMEOUT,
> + NULL);
> + for (i = 0; i < count; ++i)
> + dma_fence_put(fences[i]);
> +
> + r = (t > 0) ? 0 : t;
> + spin_lock(&sa_manager->wq.lock);
> + } else if (intr) {
> + /* if we have nothing to wait for block */
> + r = wait_event_interruptible_locked
> + (sa_manager->wq,
> + __drm_suballoc_event(sa_manager, size, align));
> + } else {
> + spin_unlock(&sa_manager->wq.lock);
> + wait_event(sa_manager->wq,
> + drm_suballoc_event(sa_manager, size, align));
> + r = 0;
> + spin_lock(&sa_manager->wq.lock);
> + }
> + } while (!r);
> +
> + spin_unlock(&sa_manager->wq.lock);
> + kfree(sa);
> + return ERR_PTR(r);
> +}
> +EXPORT_SYMBOL(drm_suballoc_new);
> +
> +/**
> + * drm_suballoc_free - Free a suballocation
> + * @suballoc: pointer to the suballocation
> + * @fence: fence that signals when suballocation is idle
> + *
> + * Free the suballocation. The suballocation can be re-used after @fence signals.
> + */
> +void drm_suballoc_free(struct drm_suballoc *suballoc,
> + struct dma_fence *fence)
> +{
> + struct drm_suballoc_manager *sa_manager;
> +
> + if (!suballoc)
> + return;
> +
> + sa_manager = suballoc->manager;
> +
> + spin_lock(&sa_manager->wq.lock);
> + if (fence && !dma_fence_is_signaled(fence)) {
> + u64 idx;
> +
> + suballoc->fence = dma_fence_get(fence);
> + idx = fence->context % DRM_SUBALLOC_MAX_QUEUES;
> + list_add_tail(&suballoc->flist, &sa_manager->flist[idx]);
> + } else {
> + drm_suballoc_remove_locked(suballoc);
> + }
> + wake_up_all_locked(&sa_manager->wq);
> + spin_unlock(&sa_manager->wq.lock);
> +}
> +EXPORT_SYMBOL(drm_suballoc_free);
> +
> +#ifdef CONFIG_DEBUG_FS
> +void drm_suballoc_dump_debug_info(struct drm_suballoc_manager *sa_manager,
> + struct drm_printer *p, u64 suballoc_base)
> +{
> + struct drm_suballoc *i;
> +
> + spin_lock(&sa_manager->wq.lock);
> + list_for_each_entry(i, &sa_manager->olist, olist) {
> + u64 soffset = i->soffset;
> + u64 eoffset = i->eoffset;
> +
> + if (&i->olist == sa_manager->hole)
> + drm_puts(p, ">");
> + else
> + drm_puts(p, " ");
> +
> + drm_printf(p, "[0x%010llx 0x%010llx] size %8lld",
> + suballoc_base + soffset, suballoc_base + eoffset,
> + eoffset - soffset);
> +
> + if (i->fence)
> + drm_printf(p, " protected by 0x%016llx on context %llu",
> + i->fence->seqno, i->fence->context);
> +
> + drm_puts(p, "\n");
> + }
> + spin_unlock(&sa_manager->wq.lock);
> +}
> +EXPORT_SYMBOL(drm_suballoc_dump_debug_info);
> +#endif
> +MODULE_AUTHOR("Multiple");
> +MODULE_DESCRIPTION("Range suballocator helper");
> +MODULE_LICENSE("Dual MIT/GPL");
> diff --git a/include/drm/drm_suballoc.h b/include/drm/drm_suballoc.h
> new file mode 100644
> index 000000000000..a737f996e5ff
> --- /dev/null
> +++ b/include/drm/drm_suballoc.h
> @@ -0,0 +1,106 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/*
> + * Copyright 2011 Red Hat Inc.
> + * Copyright © 2022 Intel Corporation
> + */
> +#ifndef _DRM_SUBALLOC_H_
> +#define _DRM_SUBALLOC_H_
> +
> +#include <drm/drm_mm.h>
> +
> +#include <linux/dma-fence.h>
> +#include <linux/types.h>
> +
> +#define DRM_SUBALLOC_MAX_QUEUES 32
> +/**
> + * struct drm_suballoc_manager - fenced range allocations
> + * @wq: Wait queue for sleeping allocations on contention.
> + * @hole: Pointer to first hole node.
> + * @olist: List of allocated ranges.
> + * @flist: Array[fence context hash] of queues of fenced allocated ranges.
> + * @size: Size of the managed range.
> + * @align: Default alignment for the managed range.
> + */
> +struct drm_suballoc_manager {
> + wait_queue_head_t wq;
> + struct list_head *hole;
> + struct list_head olist;
> + struct list_head flist[DRM_SUBALLOC_MAX_QUEUES];
> + u64 size;
> + u64 align;
> +};
> +
> +/**
> + * struct drm_suballoc - Sub-allocated range
> + * @olist: List link for list of allocated ranges.
> + * @flist: List linkk for the manager fenced allocated ranges queues.
> + * @manager: The drm_suballoc_manager.
> + * @soffset: Start offset.
> + * @eoffset: End offset + 1 so that @eoffset - @soffset = size.
> + * @dma_fence: The fence protecting the allocation.
> + */
> +struct drm_suballoc {
> + struct list_head olist;
> + struct list_head flist;
> + struct drm_suballoc_manager *manager;
> + u64 soffset;
> + u64 eoffset;
> + struct dma_fence *fence;
> +};
> +
> +void drm_suballoc_manager_init(struct drm_suballoc_manager *sa_manager,
> + u64 size, u64 align);
> +
> +void drm_suballoc_manager_fini(struct drm_suballoc_manager *sa_manager);
> +
> +struct drm_suballoc *
> +drm_suballoc_new(struct drm_suballoc_manager *sa_manager, u64 size, gfp_t gfp,
> + bool intr, u64 align);
> +
> +void drm_suballoc_free(struct drm_suballoc *sa, struct dma_fence *fence);
> +
> +/**
> + * drm_suballoc_soffset - Range start.
> + * @sa: The struct drm_suballoc.
> + *
> + * Return: The start of the allocated range.
> + */
> +static inline u64 drm_suballoc_soffset(struct drm_suballoc *sa)
> +{
> + return sa->soffset;
> +}
> +
> +/**
> + * drm_suballoc_eoffset - Range end.
> + * @sa: The struct drm_suballoc.
> + *
> + * Return: The end of the allocated range + 1.
> + */
> +static inline u64 drm_suballoc_eoffset(struct drm_suballoc *sa)
> +{
> + return sa->eoffset;
> +}
> +
> +/**
> + * drm_suballoc_size - Range size.
> + * @sa: The struct drm_suballoc.
> + *
> + * Return: The size of the allocated range.
> + */
> +static inline u64 drm_suballoc_size(struct drm_suballoc *sa)
> +{
> + return sa->eoffset - sa->soffset;
> +}
> +
> +#ifdef CONFIG_DEBUG_FS
> +void drm_suballoc_dump_debug_info(struct drm_suballoc_manager *sa_manager,
> + struct drm_printer *p, u64 suballoc_base);
> +#else
> +static inline void
> +drm_suballoc_dump_debug_info(struct drm_suballoc_manager *sa_manager,
> + struct drm_printer *p, u64 suballoc_base)
> +{ }
> +
> +#endif
> +
> +#endif /* _DRM_SUBALLOC_H_ */
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [Intel-xe] [PATCH 2/3] drm/amd: Convert amdgpu to use suballocation helper.
2023-02-23 10:57 ` [Intel-xe] [PATCH 2/3] drm/amd: Convert amdgpu to use " Thomas Hellström
@ 2023-02-23 11:15 ` Christian König
2023-02-23 14:29 ` Thomas Hellström
0 siblings, 1 reply; 12+ messages in thread
From: Christian König @ 2023-02-23 11:15 UTC (permalink / raw)
To: Thomas Hellström, dri-devel
Cc: Daniel Vetter, Maarten Lankhorst, intel-xe, Dave Airlie
Am 23.02.23 um 11:57 schrieb Thomas Hellström:
> From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>
> Now that we have a generic suballocation helper, Use it in amdgpu.
> For lines that get moved or changed, also fix up pre-existing style issues.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/Kconfig | 1 +
> drivers/gpu/drm/amd/amdgpu/Kconfig | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 26 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 5 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 23 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 324 ++-------------------
> 7 files changed, 46 insertions(+), 337 deletions(-)
>
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 8fbe57407c60..73ddfdf3a894 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -77,6 +77,7 @@ config DRM_KUNIT_TEST
> select DRM_DISPLAY_HELPER
> select DRM_LIB_RANDOM
> select DRM_KMS_HELPER
> + select DRM_SUBALLOC_HELPER
> select DRM_BUDDY
> select DRM_EXPORT_FOR_TESTS if m
> select DRM_KUNIT_TEST_HELPERS
This looks like it's misplaced, apart from that the patch looks good to me.
Regards,
Christian.
> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
> index 5341b6b242c3..0ed12171450b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
> @@ -18,6 +18,7 @@ config DRM_AMDGPU
> select BACKLIGHT_CLASS_DEVICE
> select INTERVAL_TREE
> select DRM_BUDDY
> + select DRM_SUBALLOC_HELPER
> # amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work
> # ACPI_VIDEO's dependencies must also be selected.
> select INPUT if ACPI
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 164141bc8b4a..dda88090f044 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -424,29 +424,11 @@ struct amdgpu_clock {
> * alignment).
> */
>
> -#define AMDGPU_SA_NUM_FENCE_LISTS 32
> -
> struct amdgpu_sa_manager {
> - wait_queue_head_t wq;
> - struct amdgpu_bo *bo;
> - struct list_head *hole;
> - struct list_head flist[AMDGPU_SA_NUM_FENCE_LISTS];
> - struct list_head olist;
> - unsigned size;
> - uint64_t gpu_addr;
> - void *cpu_ptr;
> - uint32_t domain;
> - uint32_t align;
> -};
> -
> -/* sub-allocation buffer */
> -struct amdgpu_sa_bo {
> - struct list_head olist;
> - struct list_head flist;
> - struct amdgpu_sa_manager *manager;
> - unsigned soffset;
> - unsigned eoffset;
> - struct dma_fence *fence;
> + struct drm_suballoc_manager base;
> + struct amdgpu_bo *bo;
> + uint64_t gpu_addr;
> + void *cpu_ptr;
> };
>
> int amdgpu_fence_slab_init(void);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index bcccc348dbe2..df7eb0b7c4b9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -69,7 +69,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>
> if (size) {
> r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type],
> - &ib->sa_bo, size, 256);
> + &ib->sa_bo, size);
> if (r) {
> dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
> return r;
> @@ -309,8 +309,7 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
>
> for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
> r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i],
> - AMDGPU_IB_POOL_SIZE,
> - AMDGPU_GPU_PAGE_SIZE,
> + AMDGPU_IB_POOL_SIZE, 256,
> AMDGPU_GEM_DOMAIN_GTT);
> if (r)
> goto error;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 93207badf83f..5a85726ce853 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -336,15 +336,22 @@ uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
> /*
> * sub allocation
> */
> +static inline struct amdgpu_sa_manager *
> +to_amdgpu_sa_manager(struct drm_suballoc_manager *manager)
> +{
> + return container_of(manager, struct amdgpu_sa_manager, base);
> +}
>
> -static inline uint64_t amdgpu_sa_bo_gpu_addr(struct amdgpu_sa_bo *sa_bo)
> +static inline uint64_t amdgpu_sa_bo_gpu_addr(struct drm_suballoc *sa_bo)
> {
> - return sa_bo->manager->gpu_addr + sa_bo->soffset;
> + return to_amdgpu_sa_manager(sa_bo->manager)->gpu_addr +
> + drm_suballoc_soffset(sa_bo);
> }
>
> -static inline void * amdgpu_sa_bo_cpu_addr(struct amdgpu_sa_bo *sa_bo)
> +static inline void *amdgpu_sa_bo_cpu_addr(struct drm_suballoc *sa_bo)
> {
> - return sa_bo->manager->cpu_ptr + sa_bo->soffset;
> + return to_amdgpu_sa_manager(sa_bo->manager)->cpu_ptr +
> + drm_suballoc_soffset(sa_bo);
> }
>
> int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
> @@ -355,11 +362,11 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
> int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
> struct amdgpu_sa_manager *sa_manager);
> int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
> - struct amdgpu_sa_bo **sa_bo,
> - unsigned size, unsigned align);
> + struct drm_suballoc **sa_bo,
> + unsigned int size);
> void amdgpu_sa_bo_free(struct amdgpu_device *adev,
> - struct amdgpu_sa_bo **sa_bo,
> - struct dma_fence *fence);
> + struct drm_suballoc **sa_bo,
> + struct dma_fence *fence);
> #if defined(CONFIG_DEBUG_FS)
> void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
> struct seq_file *m);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 3989e755a5b4..018f36b10de8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -27,6 +27,7 @@
> #include <drm/amdgpu_drm.h>
> #include <drm/gpu_scheduler.h>
> #include <drm/drm_print.h>
> +#include <drm/drm_suballoc.h>
>
> struct amdgpu_device;
> struct amdgpu_ring;
> @@ -92,7 +93,7 @@ enum amdgpu_ib_pool_type {
> };
>
> struct amdgpu_ib {
> - struct amdgpu_sa_bo *sa_bo;
> + struct drm_suballoc *sa_bo;
> uint32_t length_dw;
> uint64_t gpu_addr;
> uint32_t *ptr;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> index 524d10b21041..c6b4337eb20c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> @@ -44,327 +44,63 @@
>
> #include "amdgpu.h"
>
> -static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo);
> -static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager);
> -
> int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
> struct amdgpu_sa_manager *sa_manager,
> - unsigned size, u32 align, u32 domain)
> + unsigned int size, u32 suballoc_align, u32 domain)
> {
> - int i, r;
> -
> - init_waitqueue_head(&sa_manager->wq);
> - sa_manager->bo = NULL;
> - sa_manager->size = size;
> - sa_manager->domain = domain;
> - sa_manager->align = align;
> - sa_manager->hole = &sa_manager->olist;
> - INIT_LIST_HEAD(&sa_manager->olist);
> - for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
> - INIT_LIST_HEAD(&sa_manager->flist[i]);
> + int r;
>
> - r = amdgpu_bo_create_kernel(adev, size, align, domain, &sa_manager->bo,
> - &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
> + r = amdgpu_bo_create_kernel(adev, size, AMDGPU_GPU_PAGE_SIZE, domain,
> + &sa_manager->bo, &sa_manager->gpu_addr,
> + &sa_manager->cpu_ptr);
> if (r) {
> dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r);
> return r;
> }
>
> - memset(sa_manager->cpu_ptr, 0, sa_manager->size);
> + memset(sa_manager->cpu_ptr, 0, size);
> + drm_suballoc_manager_init(&sa_manager->base, size, suballoc_align);
> return r;
> }
>
> void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
> struct amdgpu_sa_manager *sa_manager)
> {
> - struct amdgpu_sa_bo *sa_bo, *tmp;
> -
> if (sa_manager->bo == NULL) {
> dev_err(adev->dev, "no bo for sa manager\n");
> return;
> }
>
> - if (!list_empty(&sa_manager->olist)) {
> - sa_manager->hole = &sa_manager->olist,
> - amdgpu_sa_bo_try_free(sa_manager);
> - if (!list_empty(&sa_manager->olist)) {
> - dev_err(adev->dev, "sa_manager is not empty, clearing anyway\n");
> - }
> - }
> - list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
> - amdgpu_sa_bo_remove_locked(sa_bo);
> - }
> + drm_suballoc_manager_fini(&sa_manager->base);
>
> amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
> - sa_manager->size = 0;
> }
>
> -static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
> -{
> - struct amdgpu_sa_manager *sa_manager = sa_bo->manager;
> - if (sa_manager->hole == &sa_bo->olist) {
> - sa_manager->hole = sa_bo->olist.prev;
> - }
> - list_del_init(&sa_bo->olist);
> - list_del_init(&sa_bo->flist);
> - dma_fence_put(sa_bo->fence);
> - kfree(sa_bo);
> -}
> -
> -static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager)
> +int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
> + struct drm_suballoc **sa_bo,
> + unsigned int size)
> {
> - struct amdgpu_sa_bo *sa_bo, *tmp;
> + struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size,
> + GFP_KERNEL, true, 0);
>
> - if (sa_manager->hole->next == &sa_manager->olist)
> - return;
> + if (IS_ERR(sa)) {
> + *sa_bo = NULL;
>
> - sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist);
> - list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
> - if (sa_bo->fence == NULL ||
> - !dma_fence_is_signaled(sa_bo->fence)) {
> - return;
> - }
> - amdgpu_sa_bo_remove_locked(sa_bo);
> + return PTR_ERR(sa);
> }
> -}
>
> -static inline unsigned amdgpu_sa_bo_hole_soffset(struct amdgpu_sa_manager *sa_manager)
> -{
> - struct list_head *hole = sa_manager->hole;
> -
> - if (hole != &sa_manager->olist) {
> - return list_entry(hole, struct amdgpu_sa_bo, olist)->eoffset;
> - }
> + *sa_bo = sa;
> return 0;
> }
>
> -static inline unsigned amdgpu_sa_bo_hole_eoffset(struct amdgpu_sa_manager *sa_manager)
> -{
> - struct list_head *hole = sa_manager->hole;
> -
> - if (hole->next != &sa_manager->olist) {
> - return list_entry(hole->next, struct amdgpu_sa_bo, olist)->soffset;
> - }
> - return sa_manager->size;
> -}
> -
> -static bool amdgpu_sa_bo_try_alloc(struct amdgpu_sa_manager *sa_manager,
> - struct amdgpu_sa_bo *sa_bo,
> - unsigned size, unsigned align)
> -{
> - unsigned soffset, eoffset, wasted;
> -
> - soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
> - eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
> - wasted = (align - (soffset % align)) % align;
> -
> - if ((eoffset - soffset) >= (size + wasted)) {
> - soffset += wasted;
> -
> - sa_bo->manager = sa_manager;
> - sa_bo->soffset = soffset;
> - sa_bo->eoffset = soffset + size;
> - list_add(&sa_bo->olist, sa_manager->hole);
> - INIT_LIST_HEAD(&sa_bo->flist);
> - sa_manager->hole = &sa_bo->olist;
> - return true;
> - }
> - return false;
> -}
> -
> -/**
> - * amdgpu_sa_event - Check if we can stop waiting
> - *
> - * @sa_manager: pointer to the sa_manager
> - * @size: number of bytes we want to allocate
> - * @align: alignment we need to match
> - *
> - * Check if either there is a fence we can wait for or
> - * enough free memory to satisfy the allocation directly
> - */
> -static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager,
> - unsigned size, unsigned align)
> -{
> - unsigned soffset, eoffset, wasted;
> - int i;
> -
> - for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
> - if (!list_empty(&sa_manager->flist[i]))
> - return true;
> -
> - soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
> - eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
> - wasted = (align - (soffset % align)) % align;
> -
> - if ((eoffset - soffset) >= (size + wasted)) {
> - return true;
> - }
> -
> - return false;
> -}
> -
> -static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
> - struct dma_fence **fences,
> - unsigned *tries)
> -{
> - struct amdgpu_sa_bo *best_bo = NULL;
> - unsigned i, soffset, best, tmp;
> -
> - /* if hole points to the end of the buffer */
> - if (sa_manager->hole->next == &sa_manager->olist) {
> - /* try again with its beginning */
> - sa_manager->hole = &sa_manager->olist;
> - return true;
> - }
> -
> - soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
> - /* to handle wrap around we add sa_manager->size */
> - best = sa_manager->size * 2;
> - /* go over all fence list and try to find the closest sa_bo
> - * of the current last
> - */
> - for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
> - struct amdgpu_sa_bo *sa_bo;
> -
> - fences[i] = NULL;
> -
> - if (list_empty(&sa_manager->flist[i]))
> - continue;
> -
> - sa_bo = list_first_entry(&sa_manager->flist[i],
> - struct amdgpu_sa_bo, flist);
> -
> - if (!dma_fence_is_signaled(sa_bo->fence)) {
> - fences[i] = sa_bo->fence;
> - continue;
> - }
> -
> - /* limit the number of tries each ring gets */
> - if (tries[i] > 2) {
> - continue;
> - }
> -
> - tmp = sa_bo->soffset;
> - if (tmp < soffset) {
> - /* wrap around, pretend it's after */
> - tmp += sa_manager->size;
> - }
> - tmp -= soffset;
> - if (tmp < best) {
> - /* this sa bo is the closest one */
> - best = tmp;
> - best_bo = sa_bo;
> - }
> - }
> -
> - if (best_bo) {
> - uint32_t idx = best_bo->fence->context;
> -
> - idx %= AMDGPU_SA_NUM_FENCE_LISTS;
> - ++tries[idx];
> - sa_manager->hole = best_bo->olist.prev;
> -
> - /* we knew that this one is signaled,
> - so it's save to remote it */
> - amdgpu_sa_bo_remove_locked(best_bo);
> - return true;
> - }
> - return false;
> -}
> -
> -int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
> - struct amdgpu_sa_bo **sa_bo,
> - unsigned size, unsigned align)
> -{
> - struct dma_fence *fences[AMDGPU_SA_NUM_FENCE_LISTS];
> - unsigned tries[AMDGPU_SA_NUM_FENCE_LISTS];
> - unsigned count;
> - int i, r;
> - signed long t;
> -
> - if (WARN_ON_ONCE(align > sa_manager->align))
> - return -EINVAL;
> -
> - if (WARN_ON_ONCE(size > sa_manager->size))
> - return -EINVAL;
> -
> - *sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL);
> - if (!(*sa_bo))
> - return -ENOMEM;
> - (*sa_bo)->manager = sa_manager;
> - (*sa_bo)->fence = NULL;
> - INIT_LIST_HEAD(&(*sa_bo)->olist);
> - INIT_LIST_HEAD(&(*sa_bo)->flist);
> -
> - spin_lock(&sa_manager->wq.lock);
> - do {
> - for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
> - tries[i] = 0;
> -
> - do {
> - amdgpu_sa_bo_try_free(sa_manager);
> -
> - if (amdgpu_sa_bo_try_alloc(sa_manager, *sa_bo,
> - size, align)) {
> - spin_unlock(&sa_manager->wq.lock);
> - return 0;
> - }
> -
> - /* see if we can skip over some allocations */
> - } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
> -
> - for (i = 0, count = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
> - if (fences[i])
> - fences[count++] = dma_fence_get(fences[i]);
> -
> - if (count) {
> - spin_unlock(&sa_manager->wq.lock);
> - t = dma_fence_wait_any_timeout(fences, count, false,
> - MAX_SCHEDULE_TIMEOUT,
> - NULL);
> - for (i = 0; i < count; ++i)
> - dma_fence_put(fences[i]);
> -
> - r = (t > 0) ? 0 : t;
> - spin_lock(&sa_manager->wq.lock);
> - } else {
> - /* if we have nothing to wait for block */
> - r = wait_event_interruptible_locked(
> - sa_manager->wq,
> - amdgpu_sa_event(sa_manager, size, align)
> - );
> - }
> -
> - } while (!r);
> -
> - spin_unlock(&sa_manager->wq.lock);
> - kfree(*sa_bo);
> - *sa_bo = NULL;
> - return r;
> -}
> -
> -void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
> +void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct drm_suballoc **sa_bo,
> struct dma_fence *fence)
> {
> - struct amdgpu_sa_manager *sa_manager;
> -
> if (sa_bo == NULL || *sa_bo == NULL) {
> return;
> }
>
> - sa_manager = (*sa_bo)->manager;
> - spin_lock(&sa_manager->wq.lock);
> - if (fence && !dma_fence_is_signaled(fence)) {
> - uint32_t idx;
> -
> - (*sa_bo)->fence = dma_fence_get(fence);
> - idx = fence->context % AMDGPU_SA_NUM_FENCE_LISTS;
> - list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
> - } else {
> - amdgpu_sa_bo_remove_locked(*sa_bo);
> - }
> - wake_up_all_locked(&sa_manager->wq);
> - spin_unlock(&sa_manager->wq.lock);
> + drm_suballoc_free(*sa_bo, fence);
> *sa_bo = NULL;
> }
>
> @@ -373,26 +109,8 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
> void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
> struct seq_file *m)
> {
> - struct amdgpu_sa_bo *i;
> -
> - spin_lock(&sa_manager->wq.lock);
> - list_for_each_entry(i, &sa_manager->olist, olist) {
> - uint64_t soffset = i->soffset + sa_manager->gpu_addr;
> - uint64_t eoffset = i->eoffset + sa_manager->gpu_addr;
> - if (&i->olist == sa_manager->hole) {
> - seq_printf(m, ">");
> - } else {
> - seq_printf(m, " ");
> - }
> - seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
> - soffset, eoffset, eoffset - soffset);
> + struct drm_printer p = drm_seq_file_printer(m);
>
> - if (i->fence)
> - seq_printf(m, " protected by 0x%016llx on context %llu",
> - i->fence->seqno, i->fence->context);
> -
> - seq_printf(m, "\n");
> - }
> - spin_unlock(&sa_manager->wq.lock);
> + drm_suballoc_dump_debug_info(&sa_manager->base, &p, sa_manager->gpu_addr);
> }
> #endif
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [Intel-xe] [PATCH 3/3] drm/radeon: Use the drm suballocation manager implementation.
2023-02-23 10:57 ` [Intel-xe] [PATCH 3/3] drm/radeon: Use the drm suballocation manager implementation Thomas Hellström
@ 2023-02-23 11:18 ` Christian König
0 siblings, 0 replies; 12+ messages in thread
From: Christian König @ 2023-02-23 11:18 UTC (permalink / raw)
To: Thomas Hellström, dri-devel
Cc: Daniel Vetter, Maarten Lankhorst, intel-xe, Dave Airlie
Am 23.02.23 um 11:57 schrieb Thomas Hellström:
> From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>
> Use the generic suballocation helper for radeon.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/radeon/radeon.h | 55 +---
> drivers/gpu/drm/radeon/radeon_ib.c | 12 +-
> drivers/gpu/drm/radeon/radeon_object.h | 25 +-
> drivers/gpu/drm/radeon/radeon_sa.c | 316 ++--------------------
> drivers/gpu/drm/radeon/radeon_semaphore.c | 4 +-
> 5 files changed, 56 insertions(+), 356 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 57e20780a458..d19a4b1c1a8f 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -79,6 +79,7 @@
>
> #include <drm/drm_gem.h>
> #include <drm/drm_audio_component.h>
> +#include <drm/drm_suballoc.h>
>
> #include "radeon_family.h"
> #include "radeon_mode.h"
> @@ -511,52 +512,12 @@ struct radeon_bo {
> };
> #define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, tbo.base)
>
> -/* sub-allocation manager, it has to be protected by another lock.
> - * By conception this is an helper for other part of the driver
> - * like the indirect buffer or semaphore, which both have their
> - * locking.
> - *
> - * Principe is simple, we keep a list of sub allocation in offset
> - * order (first entry has offset == 0, last entry has the highest
> - * offset).
> - *
> - * When allocating new object we first check if there is room at
> - * the end total_size - (last_object_offset + last_object_size) >=
> - * alloc_size. If so we allocate new object there.
> - *
> - * When there is not enough room at the end, we start waiting for
> - * each sub object until we reach object_offset+object_size >=
> - * alloc_size, this object then become the sub object we return.
> - *
> - * Alignment can't be bigger than page size.
> - *
> - * Hole are not considered for allocation to keep things simple.
> - * Assumption is that there won't be hole (all object on same
> - * alignment).
> - */
> struct radeon_sa_manager {
> - wait_queue_head_t wq;
> - struct radeon_bo *bo;
> - struct list_head *hole;
> - struct list_head flist[RADEON_NUM_RINGS];
> - struct list_head olist;
> - unsigned size;
> - uint64_t gpu_addr;
> - void *cpu_ptr;
> - uint32_t domain;
> - uint32_t align;
> -};
> -
> -struct radeon_sa_bo;
> -
> -/* sub-allocation buffer */
> -struct radeon_sa_bo {
> - struct list_head olist;
> - struct list_head flist;
> - struct radeon_sa_manager *manager;
> - unsigned soffset;
> - unsigned eoffset;
> - struct radeon_fence *fence;
> + struct drm_suballoc_manager base;
> + struct radeon_bo *bo;
> + uint64_t gpu_addr;
> + void *cpu_ptr;
> + u32 domain;
> };
>
> /*
> @@ -587,7 +548,7 @@ int radeon_mode_dumb_mmap(struct drm_file *filp,
> * Semaphores.
> */
> struct radeon_semaphore {
> - struct radeon_sa_bo *sa_bo;
> + struct drm_suballoc *sa_bo;
> signed waiters;
> uint64_t gpu_addr;
> };
> @@ -816,7 +777,7 @@ void radeon_irq_kms_disable_hpd(struct radeon_device *rdev, unsigned hpd_mask);
> */
>
> struct radeon_ib {
> - struct radeon_sa_bo *sa_bo;
> + struct drm_suballoc *sa_bo;
> uint32_t length_dw;
> uint64_t gpu_addr;
> uint32_t *ptr;
> diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c
> index 62b116727b4f..6a45a72488f9 100644
> --- a/drivers/gpu/drm/radeon/radeon_ib.c
> +++ b/drivers/gpu/drm/radeon/radeon_ib.c
> @@ -61,7 +61,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
> {
> int r;
>
> - r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256);
> + r = radeon_sa_bo_new(&rdev->ring_tmp_bo, &ib->sa_bo, size, 256);
> if (r) {
> dev_err(rdev->dev, "failed to get a new IB (%d)\n", r);
> return r;
> @@ -77,7 +77,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
> /* ib pool is bound at RADEON_VA_IB_OFFSET in virtual address
> * space and soffset is the offset inside the pool bo
> */
> - ib->gpu_addr = ib->sa_bo->soffset + RADEON_VA_IB_OFFSET;
> + ib->gpu_addr = drm_suballoc_soffset(ib->sa_bo) + RADEON_VA_IB_OFFSET;
> } else {
> ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo);
> }
> @@ -97,7 +97,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
> void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
> {
> radeon_sync_free(rdev, &ib->sync, ib->fence);
> - radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence);
> + radeon_sa_bo_free(&ib->sa_bo, ib->fence);
> radeon_fence_unref(&ib->fence);
> }
>
> @@ -201,8 +201,7 @@ int radeon_ib_pool_init(struct radeon_device *rdev)
>
> if (rdev->family >= CHIP_BONAIRE) {
> r = radeon_sa_bo_manager_init(rdev, &rdev->ring_tmp_bo,
> - RADEON_IB_POOL_SIZE*64*1024,
> - RADEON_GPU_PAGE_SIZE,
> + RADEON_IB_POOL_SIZE*64*1024, 256,
> RADEON_GEM_DOMAIN_GTT,
> RADEON_GEM_GTT_WC);
> } else {
> @@ -210,8 +209,7 @@ int radeon_ib_pool_init(struct radeon_device *rdev)
> * to the command stream checking
> */
> r = radeon_sa_bo_manager_init(rdev, &rdev->ring_tmp_bo,
> - RADEON_IB_POOL_SIZE*64*1024,
> - RADEON_GPU_PAGE_SIZE,
> + RADEON_IB_POOL_SIZE*64*1024, 256,
> RADEON_GEM_DOMAIN_GTT, 0);
> }
> if (r) {
> diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
> index 0a6ef49e990a..39cc87a59a9a 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.h
> +++ b/drivers/gpu/drm/radeon/radeon_object.h
> @@ -169,15 +169,22 @@ extern void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
> /*
> * sub allocation
> */
> +static inline struct radeon_sa_manager *
> +to_radeon_sa_manager(struct drm_suballoc_manager *manager)
> +{
> + return container_of(manager, struct radeon_sa_manager, base);
> +}
>
> -static inline uint64_t radeon_sa_bo_gpu_addr(struct radeon_sa_bo *sa_bo)
> +static inline uint64_t radeon_sa_bo_gpu_addr(struct drm_suballoc *sa_bo)
> {
> - return sa_bo->manager->gpu_addr + sa_bo->soffset;
> + return to_radeon_sa_manager(sa_bo->manager)->gpu_addr +
> + drm_suballoc_soffset(sa_bo);
> }
>
> -static inline void * radeon_sa_bo_cpu_addr(struct radeon_sa_bo *sa_bo)
> +static inline void *radeon_sa_bo_cpu_addr(struct drm_suballoc *sa_bo)
> {
> - return sa_bo->manager->cpu_ptr + sa_bo->soffset;
> + return to_radeon_sa_manager(sa_bo->manager)->cpu_ptr +
> + drm_suballoc_soffset(sa_bo);
> }
>
> extern int radeon_sa_bo_manager_init(struct radeon_device *rdev,
> @@ -190,12 +197,10 @@ extern int radeon_sa_bo_manager_start(struct radeon_device *rdev,
> struct radeon_sa_manager *sa_manager);
> extern int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
> struct radeon_sa_manager *sa_manager);
> -extern int radeon_sa_bo_new(struct radeon_device *rdev,
> - struct radeon_sa_manager *sa_manager,
> - struct radeon_sa_bo **sa_bo,
> - unsigned size, unsigned align);
> -extern void radeon_sa_bo_free(struct radeon_device *rdev,
> - struct radeon_sa_bo **sa_bo,
> +extern int radeon_sa_bo_new(struct radeon_sa_manager *sa_manager,
> + struct drm_suballoc **sa_bo,
> + unsigned int size, unsigned int align);
> +extern void radeon_sa_bo_free(struct drm_suballoc **sa_bo,
> struct radeon_fence *fence);
> #if defined(CONFIG_DEBUG_FS)
> extern void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
> diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
> index 0981948bd9ed..c87a57c9c592 100644
> --- a/drivers/gpu/drm/radeon/radeon_sa.c
> +++ b/drivers/gpu/drm/radeon/radeon_sa.c
> @@ -44,53 +44,32 @@
>
> #include "radeon.h"
>
> -static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo);
> -static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager);
> -
> int radeon_sa_bo_manager_init(struct radeon_device *rdev,
> struct radeon_sa_manager *sa_manager,
> - unsigned size, u32 align, u32 domain, u32 flags)
> + unsigned int size, u32 sa_align, u32 domain,
> + u32 flags)
> {
> - int i, r;
> -
> - init_waitqueue_head(&sa_manager->wq);
> - sa_manager->bo = NULL;
> - sa_manager->size = size;
> - sa_manager->domain = domain;
> - sa_manager->align = align;
> - sa_manager->hole = &sa_manager->olist;
> - INIT_LIST_HEAD(&sa_manager->olist);
> - for (i = 0; i < RADEON_NUM_RINGS; ++i) {
> - INIT_LIST_HEAD(&sa_manager->flist[i]);
> - }
> + int r;
>
> - r = radeon_bo_create(rdev, size, align, true,
> + r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true,
> domain, flags, NULL, NULL, &sa_manager->bo);
> if (r) {
> dev_err(rdev->dev, "(%d) failed to allocate bo for manager\n", r);
> return r;
> }
>
> + sa_manager->domain = domain;
> +
> + drm_suballoc_manager_init(&sa_manager->base, size, sa_align);
> +
> return r;
> }
>
> void radeon_sa_bo_manager_fini(struct radeon_device *rdev,
> struct radeon_sa_manager *sa_manager)
> {
> - struct radeon_sa_bo *sa_bo, *tmp;
> -
> - if (!list_empty(&sa_manager->olist)) {
> - sa_manager->hole = &sa_manager->olist,
> - radeon_sa_bo_try_free(sa_manager);
> - if (!list_empty(&sa_manager->olist)) {
> - dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");
> - }
> - }
> - list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
> - radeon_sa_bo_remove_locked(sa_bo);
> - }
> + drm_suballoc_manager_fini(&sa_manager->base);
> radeon_bo_unref(&sa_manager->bo);
> - sa_manager->size = 0;
> }
>
> int radeon_sa_bo_manager_start(struct radeon_device *rdev,
> @@ -139,260 +118,34 @@ int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
> return r;
> }
>
> -static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo)
> +int radeon_sa_bo_new(struct radeon_sa_manager *sa_manager,
> + struct drm_suballoc **sa_bo,
> + unsigned int size, unsigned int align)
> {
> - struct radeon_sa_manager *sa_manager = sa_bo->manager;
> - if (sa_manager->hole == &sa_bo->olist) {
> - sa_manager->hole = sa_bo->olist.prev;
> - }
> - list_del_init(&sa_bo->olist);
> - list_del_init(&sa_bo->flist);
> - radeon_fence_unref(&sa_bo->fence);
> - kfree(sa_bo);
> -}
> -
> -static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager)
> -{
> - struct radeon_sa_bo *sa_bo, *tmp;
> -
> - if (sa_manager->hole->next == &sa_manager->olist)
> - return;
> + struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size,
> + GFP_KERNEL, true, align);
>
> - sa_bo = list_entry(sa_manager->hole->next, struct radeon_sa_bo, olist);
> - list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
> - if (sa_bo->fence == NULL || !radeon_fence_signaled(sa_bo->fence)) {
> - return;
> - }
> - radeon_sa_bo_remove_locked(sa_bo);
> + if (IS_ERR(sa)) {
> + *sa_bo = NULL;
> + return PTR_ERR(sa);
> }
> -}
>
> -static inline unsigned radeon_sa_bo_hole_soffset(struct radeon_sa_manager *sa_manager)
> -{
> - struct list_head *hole = sa_manager->hole;
> -
> - if (hole != &sa_manager->olist) {
> - return list_entry(hole, struct radeon_sa_bo, olist)->eoffset;
> - }
> + *sa_bo = sa;
> return 0;
> }
>
> -static inline unsigned radeon_sa_bo_hole_eoffset(struct radeon_sa_manager *sa_manager)
> -{
> - struct list_head *hole = sa_manager->hole;
> -
> - if (hole->next != &sa_manager->olist) {
> - return list_entry(hole->next, struct radeon_sa_bo, olist)->soffset;
> - }
> - return sa_manager->size;
> -}
> -
> -static bool radeon_sa_bo_try_alloc(struct radeon_sa_manager *sa_manager,
> - struct radeon_sa_bo *sa_bo,
> - unsigned size, unsigned align)
> -{
> - unsigned soffset, eoffset, wasted;
> -
> - soffset = radeon_sa_bo_hole_soffset(sa_manager);
> - eoffset = radeon_sa_bo_hole_eoffset(sa_manager);
> - wasted = (align - (soffset % align)) % align;
> -
> - if ((eoffset - soffset) >= (size + wasted)) {
> - soffset += wasted;
> -
> - sa_bo->manager = sa_manager;
> - sa_bo->soffset = soffset;
> - sa_bo->eoffset = soffset + size;
> - list_add(&sa_bo->olist, sa_manager->hole);
> - INIT_LIST_HEAD(&sa_bo->flist);
> - sa_manager->hole = &sa_bo->olist;
> - return true;
> - }
> - return false;
> -}
> -
> -/**
> - * radeon_sa_event - Check if we can stop waiting
> - *
> - * @sa_manager: pointer to the sa_manager
> - * @size: number of bytes we want to allocate
> - * @align: alignment we need to match
> - *
> - * Check if either there is a fence we can wait for or
> - * enough free memory to satisfy the allocation directly
> - */
> -static bool radeon_sa_event(struct radeon_sa_manager *sa_manager,
> - unsigned size, unsigned align)
> -{
> - unsigned soffset, eoffset, wasted;
> - int i;
> -
> - for (i = 0; i < RADEON_NUM_RINGS; ++i) {
> - if (!list_empty(&sa_manager->flist[i])) {
> - return true;
> - }
> - }
> -
> - soffset = radeon_sa_bo_hole_soffset(sa_manager);
> - eoffset = radeon_sa_bo_hole_eoffset(sa_manager);
> - wasted = (align - (soffset % align)) % align;
> -
> - if ((eoffset - soffset) >= (size + wasted)) {
> - return true;
> - }
> -
> - return false;
> -}
> -
> -static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager,
> - struct radeon_fence **fences,
> - unsigned *tries)
> -{
> - struct radeon_sa_bo *best_bo = NULL;
> - unsigned i, soffset, best, tmp;
> -
> - /* if hole points to the end of the buffer */
> - if (sa_manager->hole->next == &sa_manager->olist) {
> - /* try again with its beginning */
> - sa_manager->hole = &sa_manager->olist;
> - return true;
> - }
> -
> - soffset = radeon_sa_bo_hole_soffset(sa_manager);
> - /* to handle wrap around we add sa_manager->size */
> - best = sa_manager->size * 2;
> - /* go over all fence list and try to find the closest sa_bo
> - * of the current last
> - */
> - for (i = 0; i < RADEON_NUM_RINGS; ++i) {
> - struct radeon_sa_bo *sa_bo;
> -
> - fences[i] = NULL;
> -
> - if (list_empty(&sa_manager->flist[i])) {
> - continue;
> - }
> -
> - sa_bo = list_first_entry(&sa_manager->flist[i],
> - struct radeon_sa_bo, flist);
> -
> - if (!radeon_fence_signaled(sa_bo->fence)) {
> - fences[i] = sa_bo->fence;
> - continue;
> - }
> -
> - /* limit the number of tries each ring gets */
> - if (tries[i] > 2) {
> - continue;
> - }
> -
> - tmp = sa_bo->soffset;
> - if (tmp < soffset) {
> - /* wrap around, pretend it's after */
> - tmp += sa_manager->size;
> - }
> - tmp -= soffset;
> - if (tmp < best) {
> - /* this sa bo is the closest one */
> - best = tmp;
> - best_bo = sa_bo;
> - }
> - }
> -
> - if (best_bo) {
> - ++tries[best_bo->fence->ring];
> - sa_manager->hole = best_bo->olist.prev;
> -
> - /* we knew that this one is signaled,
> - so it's save to remote it */
> - radeon_sa_bo_remove_locked(best_bo);
> - return true;
> - }
> - return false;
> -}
> -
> -int radeon_sa_bo_new(struct radeon_device *rdev,
> - struct radeon_sa_manager *sa_manager,
> - struct radeon_sa_bo **sa_bo,
> - unsigned size, unsigned align)
> -{
> - struct radeon_fence *fences[RADEON_NUM_RINGS];
> - unsigned tries[RADEON_NUM_RINGS];
> - int i, r;
> -
> - BUG_ON(align > sa_manager->align);
> - BUG_ON(size > sa_manager->size);
> -
> - *sa_bo = kmalloc(sizeof(struct radeon_sa_bo), GFP_KERNEL);
> - if ((*sa_bo) == NULL) {
> - return -ENOMEM;
> - }
> - (*sa_bo)->manager = sa_manager;
> - (*sa_bo)->fence = NULL;
> - INIT_LIST_HEAD(&(*sa_bo)->olist);
> - INIT_LIST_HEAD(&(*sa_bo)->flist);
> -
> - spin_lock(&sa_manager->wq.lock);
> - do {
> - for (i = 0; i < RADEON_NUM_RINGS; ++i)
> - tries[i] = 0;
> -
> - do {
> - radeon_sa_bo_try_free(sa_manager);
> -
> - if (radeon_sa_bo_try_alloc(sa_manager, *sa_bo,
> - size, align)) {
> - spin_unlock(&sa_manager->wq.lock);
> - return 0;
> - }
> -
> - /* see if we can skip over some allocations */
> - } while (radeon_sa_bo_next_hole(sa_manager, fences, tries));
> -
> - for (i = 0; i < RADEON_NUM_RINGS; ++i)
> - radeon_fence_ref(fences[i]);
> -
> - spin_unlock(&sa_manager->wq.lock);
> - r = radeon_fence_wait_any(rdev, fences, false);
> - for (i = 0; i < RADEON_NUM_RINGS; ++i)
> - radeon_fence_unref(&fences[i]);
> - spin_lock(&sa_manager->wq.lock);
> - /* if we have nothing to wait for block */
> - if (r == -ENOENT) {
> - r = wait_event_interruptible_locked(
> - sa_manager->wq,
> - radeon_sa_event(sa_manager, size, align)
> - );
> - }
> -
> - } while (!r);
> -
> - spin_unlock(&sa_manager->wq.lock);
> - kfree(*sa_bo);
> - *sa_bo = NULL;
> - return r;
> -}
> -
> -void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
> +void radeon_sa_bo_free(struct drm_suballoc **sa_bo,
> struct radeon_fence *fence)
> {
> - struct radeon_sa_manager *sa_manager;
> -
> if (sa_bo == NULL || *sa_bo == NULL) {
> return;
> }
>
> - sa_manager = (*sa_bo)->manager;
> - spin_lock(&sa_manager->wq.lock);
> - if (fence && !radeon_fence_signaled(fence)) {
> - (*sa_bo)->fence = radeon_fence_ref(fence);
> - list_add_tail(&(*sa_bo)->flist,
> - &sa_manager->flist[fence->ring]);
> - } else {
> - radeon_sa_bo_remove_locked(*sa_bo);
> - }
> - wake_up_all_locked(&sa_manager->wq);
> - spin_unlock(&sa_manager->wq.lock);
> + if (fence)
> + drm_suballoc_free(*sa_bo, &fence->base);
> + else
> + drm_suballoc_free(*sa_bo, NULL);
> +
> *sa_bo = NULL;
> }
>
> @@ -400,25 +153,8 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
> void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
> struct seq_file *m)
> {
> - struct radeon_sa_bo *i;
> + struct drm_printer p = drm_seq_file_printer(m);
>
> - spin_lock(&sa_manager->wq.lock);
> - list_for_each_entry(i, &sa_manager->olist, olist) {
> - uint64_t soffset = i->soffset + sa_manager->gpu_addr;
> - uint64_t eoffset = i->eoffset + sa_manager->gpu_addr;
> - if (&i->olist == sa_manager->hole) {
> - seq_printf(m, ">");
> - } else {
> - seq_printf(m, " ");
> - }
> - seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
> - soffset, eoffset, eoffset - soffset);
> - if (i->fence) {
> - seq_printf(m, " protected by 0x%016llx on ring %d",
> - i->fence->seq, i->fence->ring);
> - }
> - seq_printf(m, "\n");
> - }
> - spin_unlock(&sa_manager->wq.lock);
> + drm_suballoc_dump_debug_info(&sa_manager->base, &p, sa_manager->gpu_addr);
> }
> #endif
> diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
> index 221e59476f64..1f0a9a4ff5ae 100644
> --- a/drivers/gpu/drm/radeon/radeon_semaphore.c
> +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
> @@ -40,7 +40,7 @@ int radeon_semaphore_create(struct radeon_device *rdev,
> if (*semaphore == NULL) {
> return -ENOMEM;
> }
> - r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo,
> + r = radeon_sa_bo_new(&rdev->ring_tmp_bo,
> &(*semaphore)->sa_bo, 8, 8);
> if (r) {
> kfree(*semaphore);
> @@ -100,7 +100,7 @@ void radeon_semaphore_free(struct radeon_device *rdev,
> dev_err(rdev->dev, "semaphore %p has more waiters than signalers,"
> " hardware lockup imminent!\n", *semaphore);
> }
> - radeon_sa_bo_free(rdev, &(*semaphore)->sa_bo, fence);
> + radeon_sa_bo_free(&(*semaphore)->sa_bo, fence);
> kfree(*semaphore);
> *semaphore = NULL;
> }
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [Intel-xe] [PATCH 1/3] drm/suballoc: Extract amdgpu_sa.c as generic suballocation helper
2023-02-23 11:13 ` Christian König
@ 2023-02-23 11:22 ` Thomas Hellström
2023-02-23 11:56 ` Christian König
0 siblings, 1 reply; 12+ messages in thread
From: Thomas Hellström @ 2023-02-23 11:22 UTC (permalink / raw)
To: Christian König, dri-devel
Cc: Daniel Vetter, Maarten Lankhorst, intel-xe, Dave Airlie
On 2/23/23 12:13, Christian König wrote:
> Am 23.02.23 um 11:57 schrieb Thomas Hellström:
>> From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>>
>> Suballocating a buffer object is something that is not driver-specific
>> and useful for many drivers.
>>
>> Use a slightly modified version of amdgpu_sa.c
>>
>> v2:
>> - Style cleanups. (Thomas)
>> - Added / Modified documentation (Thomas)
>> - Use u64 for the sizes and offset. The code dates back to 2012 and
>> using unsigned int will probably soon come back to bite us.
>> We can consider size_t as well for better 32-bit efficiency. (Thomas)
>> - Add and document gfp, intr and align arguments to drm_suballoc_new()
>> (Thomas)
>> - Use drm_printer for debug output (Thomas)
>>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>> Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> ---
>> drivers/gpu/drm/Kconfig | 4 +
>> drivers/gpu/drm/Makefile | 3 +
>> drivers/gpu/drm/drm_suballoc.c | 457 +++++++++++++++++++++++++++++++++
>> include/drm/drm_suballoc.h | 106 ++++++++
>> 4 files changed, 570 insertions(+)
>> create mode 100644 drivers/gpu/drm/drm_suballoc.c
>> create mode 100644 include/drm/drm_suballoc.h
>>
>> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
>> index dc0f94f02a82..8fbe57407c60 100644
>> --- a/drivers/gpu/drm/Kconfig
>> +++ b/drivers/gpu/drm/Kconfig
>> @@ -232,6 +232,10 @@ config DRM_GEM_SHMEM_HELPER
>> help
>> Choose this if you need the GEM shmem helper functions
>> +config DRM_SUBALLOC_HELPER
>> + tristate
>> + depends on DRM
>> +
>> config DRM_SCHED
>> tristate
>> depends on DRM
>> diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
>> index ab4460fcd63f..1e04d135e866 100644
>> --- a/drivers/gpu/drm/Makefile
>> +++ b/drivers/gpu/drm/Makefile
>> @@ -88,6 +88,9 @@ obj-$(CONFIG_DRM_GEM_DMA_HELPER) += drm_dma_helper.o
>> drm_shmem_helper-y := drm_gem_shmem_helper.o
>> obj-$(CONFIG_DRM_GEM_SHMEM_HELPER) += drm_shmem_helper.o
>> +drm_suballoc_helper-y := drm_suballoc.o
>> +obj-$(CONFIG_DRM_SUBALLOC_HELPER) += drm_suballoc_helper.o
>> +
>> drm_vram_helper-y := drm_gem_vram_helper.o
>> obj-$(CONFIG_DRM_VRAM_HELPER) += drm_vram_helper.o
>> diff --git a/drivers/gpu/drm/drm_suballoc.c
>> b/drivers/gpu/drm/drm_suballoc.c
>> new file mode 100644
>> index 000000000000..057cd19c44ba
>> --- /dev/null
>> +++ b/drivers/gpu/drm/drm_suballoc.c
>> @@ -0,0 +1,457 @@
>> +// SPDX-License-Identifier: GPL-2.0 OR MIT
>> +/*
>> + * Copyright 2011 Red Hat Inc.
>> + * Copyright 2023 Intel Corporation.
>> + * All Rights Reserved.
>> + *
>> + * Permission is hereby granted, free of charge, to any person
>> obtaining a
>> + * copy of this software and associated documentation files (the
>> + * "Software"), to deal in the Software without restriction, including
>> + * without limitation the rights to use, copy, modify, merge, publish,
>> + * distribute, sub license, and/or sell copies of the Software, and to
>> + * permit persons to whom the Software is furnished to do so,
>> subject to
>> + * the following conditions:
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO
>> EVENT SHALL
>> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
>> ANY CLAIM,
>> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
>> TORT OR
>> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
>> SOFTWARE OR THE
>> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + * The above copyright notice and this permission notice (including the
>> + * next paragraph) shall be included in all copies or substantial
>> portions
>> + * of the Software.
>> + *
>> + */
>> +/*
>> + * Authors:
>> + * Jerome Glisse <glisse@freedesktop.org>
>> + */
>
> We should probably update this or just leave it out.
>
You mean the author info? Sure, can leave it out.
> Apart from that Reviewed-by: Christian König <christian.koenig@amd.com>.
Thanks for reviewing. I'll update this and the comment on the amdgpu patch.
Is drm-misc a suitable tree for this or any other preferences?
/Thomas
>
> Regards,
> Christian.
>
>> +/* Algorithm:
>> + *
>> + * We store the last allocated bo in "hole", we always try to allocate
>> + * after the last allocated bo. Principle is that in a linear GPU ring
>> + * progression was is after last is the oldest bo we allocated and thus
>> + * the first one that should no longer be in use by the GPU.
>> + *
>> + * If it's not the case we skip over the bo after last to the closest
>> + * done bo if such one exist. If none exist and we are not asked to
>> + * block we report failure to allocate.
>> + *
>> + * If we are asked to block we wait on all the oldest fence of all
>> + * rings. We just wait for any of those fence to complete.
>> + */
>> +
>> +#include <drm/drm_suballoc.h>
>> +#include <drm/drm_print.h>
>> +#include <linux/slab.h>
>> +#include <linux/sched.h>
>> +#include <linux/wait.h>
>> +#include <linux/dma-fence.h>
>> +
>> +static void drm_suballoc_remove_locked(struct drm_suballoc *sa);
>> +static void drm_suballoc_try_free(struct drm_suballoc_manager
>> *sa_manager);
>> +
>> +/**
>> + * drm_suballoc_manager_init() - Initialise the drm_suballoc_manager
>> + * @sa_manager: pointer to the sa_manager
>> + * @size: number of bytes we want to suballocate
>> + * @align: alignment for each suballocated chunk
>> + *
>> + * Prepares the suballocation manager for suballocations.
>> + */
>> +void drm_suballoc_manager_init(struct drm_suballoc_manager *sa_manager,
>> + u64 size, u64 align)
>> +{
>> + unsigned int i;
>> +
>> + if (!align)
>> + align = 1;
>> +
>> + /* alignment must be a power of 2 */
>> + if (WARN_ON_ONCE(align & (align - 1)))
>> + align = roundup_pow_of_two(align);
>> +
>> + init_waitqueue_head(&sa_manager->wq);
>> + sa_manager->size = size;
>> + sa_manager->align = align;
>> + sa_manager->hole = &sa_manager->olist;
>> + INIT_LIST_HEAD(&sa_manager->olist);
>> + for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i)
>> + INIT_LIST_HEAD(&sa_manager->flist[i]);
>> +}
>> +EXPORT_SYMBOL(drm_suballoc_manager_init);
>> +
>> +/**
>> + * drm_suballoc_manager_fini() - Destroy the drm_suballoc_manager
>> + * @sa_manager: pointer to the sa_manager
>> + *
>> + * Cleans up the suballocation manager after use. All fences added
>> + * with drm_suballoc_free() must be signaled, or we cannot clean up
>> + * the entire manager.
>> + */
>> +void drm_suballoc_manager_fini(struct drm_suballoc_manager *sa_manager)
>> +{
>> + struct drm_suballoc *sa, *tmp;
>> +
>> + if (!sa_manager->size)
>> + return;
>> +
>> + if (!list_empty(&sa_manager->olist)) {
>> + sa_manager->hole = &sa_manager->olist;
>> + drm_suballoc_try_free(sa_manager);
>> + if (!list_empty(&sa_manager->olist))
>> + DRM_ERROR("sa_manager is not empty, clearing anyway\n");
>> + }
>> + list_for_each_entry_safe(sa, tmp, &sa_manager->olist, olist) {
>> + drm_suballoc_remove_locked(sa);
>> + }
>> +
>> + sa_manager->size = 0;
>> +}
>> +EXPORT_SYMBOL(drm_suballoc_manager_fini);
>> +
>> +static void drm_suballoc_remove_locked(struct drm_suballoc *sa)
>> +{
>> + struct drm_suballoc_manager *sa_manager = sa->manager;
>> +
>> + if (sa_manager->hole == &sa->olist)
>> + sa_manager->hole = sa->olist.prev;
>> +
>> + list_del_init(&sa->olist);
>> + list_del_init(&sa->flist);
>> + dma_fence_put(sa->fence);
>> + kfree(sa);
>> +}
>> +
>> +static void drm_suballoc_try_free(struct drm_suballoc_manager
>> *sa_manager)
>> +{
>> + struct drm_suballoc *sa, *tmp;
>> +
>> + if (sa_manager->hole->next == &sa_manager->olist)
>> + return;
>> +
>> + sa = list_entry(sa_manager->hole->next, struct drm_suballoc,
>> olist);
>> + list_for_each_entry_safe_from(sa, tmp, &sa_manager->olist, olist) {
>> + if (!sa->fence || !dma_fence_is_signaled(sa->fence))
>> + return;
>> +
>> + drm_suballoc_remove_locked(sa);
>> + }
>> +}
>> +
>> +static u64 drm_suballoc_hole_soffset(struct drm_suballoc_manager
>> *sa_manager)
>> +{
>> + struct list_head *hole = sa_manager->hole;
>> +
>> + if (hole != &sa_manager->olist)
>> + return list_entry(hole, struct drm_suballoc, olist)->eoffset;
>> +
>> + return 0;
>> +}
>> +
>> +static u64 drm_suballoc_hole_eoffset(struct drm_suballoc_manager
>> *sa_manager)
>> +{
>> + struct list_head *hole = sa_manager->hole;
>> +
>> + if (hole->next != &sa_manager->olist)
>> + return list_entry(hole->next, struct drm_suballoc,
>> olist)->soffset;
>> + return sa_manager->size;
>> +}
>> +
>> +static bool drm_suballoc_try_alloc(struct drm_suballoc_manager
>> *sa_manager,
>> + struct drm_suballoc *sa,
>> + u64 size, u64 align)
>> +{
>> + u64 soffset, eoffset, wasted;
>> +
>> + soffset = drm_suballoc_hole_soffset(sa_manager);
>> + eoffset = drm_suballoc_hole_eoffset(sa_manager);
>> + wasted = (align - (soffset % align)) % align;
>> +
>> + if ((eoffset - soffset) >= (size + wasted)) {
>> + soffset += wasted;
>> +
>> + sa->manager = sa_manager;
>> + sa->soffset = soffset;
>> + sa->eoffset = soffset + size;
>> + list_add(&sa->olist, sa_manager->hole);
>> + INIT_LIST_HEAD(&sa->flist);
>> + sa_manager->hole = &sa->olist;
>> + return true;
>> + }
>> + return false;
>> +}
>> +
>> +static bool __drm_suballoc_event(struct drm_suballoc_manager
>> *sa_manager,
>> + u64 size, u64 align)
>> +{
>> + u64 soffset, eoffset, wasted;
>> + unsigned int i;
>> +
>> + for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i)
>> + if (!list_empty(&sa_manager->flist[i]))
>> + return true;
>> +
>> + soffset = drm_suballoc_hole_soffset(sa_manager);
>> + eoffset = drm_suballoc_hole_eoffset(sa_manager);
>> + wasted = (align - (soffset % align)) % align;
>> +
>> + return ((eoffset - soffset) >= (size + wasted));
>> +}
>> +
>> +/**
>> + * drm_suballoc_event() - Check if we can stop waiting
>> + * @sa_manager: pointer to the sa_manager
>> + * @size: number of bytes we want to allocate
>> + * @align: alignment we need to match
>> + *
>> + * Return: true if either there is a fence we can wait for or
>> + * enough free memory to satisfy the allocation directly.
>> + * false otherwise.
>> + */
>> +static bool drm_suballoc_event(struct drm_suballoc_manager *sa_manager,
>> + u64 size, u64 align)
>> +{
>> + bool ret;
>> +
>> + spin_lock(&sa_manager->wq.lock);
>> + ret = __drm_suballoc_event(sa_manager, size, align);
>> + spin_unlock(&sa_manager->wq.lock);
>> + return ret;
>> +}
>> +
>> +static bool drm_suballoc_next_hole(struct drm_suballoc_manager
>> *sa_manager,
>> + struct dma_fence **fences,
>> + unsigned int *tries)
>> +{
>> + struct drm_suballoc *best_bo = NULL;
>> + unsigned int i, best_idx;
>> + u64 soffset, best, tmp;
>> +
>> + /* if hole points to the end of the buffer */
>> + if (sa_manager->hole->next == &sa_manager->olist) {
>> + /* try again with its beginning */
>> + sa_manager->hole = &sa_manager->olist;
>> + return true;
>> + }
>> +
>> + soffset = drm_suballoc_hole_soffset(sa_manager);
>> + /* to handle wrap around we add sa_manager->size */
>> + best = sa_manager->size * 2;
>> + /* go over all fence list and try to find the closest sa
>> + * of the current last
>> + */
>> + for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i) {
>> + struct drm_suballoc *sa;
>> +
>> + fences[i] = NULL;
>> +
>> + if (list_empty(&sa_manager->flist[i]))
>> + continue;
>> +
>> + sa = list_first_entry(&sa_manager->flist[i],
>> + struct drm_suballoc, flist);
>> +
>> + if (!dma_fence_is_signaled(sa->fence)) {
>> + fences[i] = sa->fence;
>> + continue;
>> + }
>> +
>> + /* limit the number of tries each freelist gets */
>> + if (tries[i] > 2)
>> + continue;
>> +
>> + tmp = sa->soffset;
>> + if (tmp < soffset) {
>> + /* wrap around, pretend it's after */
>> + tmp += sa_manager->size;
>> + }
>> + tmp -= soffset;
>> + if (tmp < best) {
>> + /* this sa bo is the closest one */
>> + best = tmp;
>> + best_idx = i;
>> + best_bo = sa;
>> + }
>> + }
>> +
>> + if (best_bo) {
>> + ++tries[best_idx];
>> + sa_manager->hole = best_bo->olist.prev;
>> +
>> + /*
>> + * We know that this one is signaled,
>> + * so it's safe to remove it.
>> + */
>> + drm_suballoc_remove_locked(best_bo);
>> + return true;
>> + }
>> + return false;
>> +}
>> +
>> +/**
>> + * drm_suballoc_new() - Make a suballocation.
>> + * @sa_manager: pointer to the sa_manager
>> + * @size: number of bytes we want to suballocate.
>> + * @gfp: gfp flags used for memory allocation. Typically GFP_KERNEL but
>> + * the argument is provided for suballocations from reclaim
>> context or
>> + * where the caller wants to avoid pipelining rather than wait
>> for
>> + * reclaim.
>> + * @intr: Whether to perform waits interruptible. This should typically
>> + * always be true, unless the caller needs to propagate a
>> + * non-interruptible context from above layers.
>> + * @align: Alignment. Must not exceed the default manager alignment.
>> + * If @align is zero, then the manager alignment is used.
>> + *
>> + * Try to make a suballocation of size @size, which will be rounded
>> + * up to the alignment specified in specified in
>> drm_suballoc_manager_init().
>> + *
>> + * Return: a new suballocated bo, or an ERR_PTR.
>> + */
>> +struct drm_suballoc *
>> +drm_suballoc_new(struct drm_suballoc_manager *sa_manager, u64 size,
>> + gfp_t gfp, bool intr, u64 align)
>> +{
>> + struct dma_fence *fences[DRM_SUBALLOC_MAX_QUEUES];
>> + unsigned int tries[DRM_SUBALLOC_MAX_QUEUES];
>> + unsigned int count;
>> + int i, r;
>> + struct drm_suballoc *sa;
>> +
>> + if (WARN_ON_ONCE(align > sa_manager->align))
>> + return ERR_PTR(-EINVAL);
>> + if (WARN_ON_ONCE(size > sa_manager->size || !size))
>> + return ERR_PTR(-EINVAL);
>> +
>> + if (!align)
>> + align = sa_manager->align;
>> +
>> + sa = kmalloc(sizeof(*sa), gfp);
>> + if (!sa)
>> + return ERR_PTR(-ENOMEM);
>> + sa->manager = sa_manager;
>> + sa->fence = NULL;
>> + INIT_LIST_HEAD(&sa->olist);
>> + INIT_LIST_HEAD(&sa->flist);
>> +
>> + spin_lock(&sa_manager->wq.lock);
>> + do {
>> + for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i)
>> + tries[i] = 0;
>> +
>> + do {
>> + drm_suballoc_try_free(sa_manager);
>> +
>> + if (drm_suballoc_try_alloc(sa_manager, sa,
>> + size, align)) {
>> + spin_unlock(&sa_manager->wq.lock);
>> + return sa;
>> + }
>> +
>> + /* see if we can skip over some allocations */
>> + } while (drm_suballoc_next_hole(sa_manager, fences, tries));
>> +
>> + for (i = 0, count = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i)
>> + if (fences[i])
>> + fences[count++] = dma_fence_get(fences[i]);
>> +
>> + if (count) {
>> + long t;
>> +
>> + spin_unlock(&sa_manager->wq.lock);
>> + t = dma_fence_wait_any_timeout(fences, count, intr,
>> + MAX_SCHEDULE_TIMEOUT,
>> + NULL);
>> + for (i = 0; i < count; ++i)
>> + dma_fence_put(fences[i]);
>> +
>> + r = (t > 0) ? 0 : t;
>> + spin_lock(&sa_manager->wq.lock);
>> + } else if (intr) {
>> + /* if we have nothing to wait for block */
>> + r = wait_event_interruptible_locked
>> + (sa_manager->wq,
>> + __drm_suballoc_event(sa_manager, size, align));
>> + } else {
>> + spin_unlock(&sa_manager->wq.lock);
>> + wait_event(sa_manager->wq,
>> + drm_suballoc_event(sa_manager, size, align));
>> + r = 0;
>> + spin_lock(&sa_manager->wq.lock);
>> + }
>> + } while (!r);
>> +
>> + spin_unlock(&sa_manager->wq.lock);
>> + kfree(sa);
>> + return ERR_PTR(r);
>> +}
>> +EXPORT_SYMBOL(drm_suballoc_new);
>> +
>> +/**
>> + * drm_suballoc_free - Free a suballocation
>> + * @suballoc: pointer to the suballocation
>> + * @fence: fence that signals when suballocation is idle
>> + *
>> + * Free the suballocation. The suballocation can be re-used after
>> @fence signals.
>> + */
>> +void drm_suballoc_free(struct drm_suballoc *suballoc,
>> + struct dma_fence *fence)
>> +{
>> + struct drm_suballoc_manager *sa_manager;
>> +
>> + if (!suballoc)
>> + return;
>> +
>> + sa_manager = suballoc->manager;
>> +
>> + spin_lock(&sa_manager->wq.lock);
>> + if (fence && !dma_fence_is_signaled(fence)) {
>> + u64 idx;
>> +
>> + suballoc->fence = dma_fence_get(fence);
>> + idx = fence->context % DRM_SUBALLOC_MAX_QUEUES;
>> + list_add_tail(&suballoc->flist, &sa_manager->flist[idx]);
>> + } else {
>> + drm_suballoc_remove_locked(suballoc);
>> + }
>> + wake_up_all_locked(&sa_manager->wq);
>> + spin_unlock(&sa_manager->wq.lock);
>> +}
>> +EXPORT_SYMBOL(drm_suballoc_free);
>> +
>> +#ifdef CONFIG_DEBUG_FS
>> +void drm_suballoc_dump_debug_info(struct drm_suballoc_manager
>> *sa_manager,
>> + struct drm_printer *p, u64 suballoc_base)
>> +{
>> + struct drm_suballoc *i;
>> +
>> + spin_lock(&sa_manager->wq.lock);
>> + list_for_each_entry(i, &sa_manager->olist, olist) {
>> + u64 soffset = i->soffset;
>> + u64 eoffset = i->eoffset;
>> +
>> + if (&i->olist == sa_manager->hole)
>> + drm_puts(p, ">");
>> + else
>> + drm_puts(p, " ");
>> +
>> + drm_printf(p, "[0x%010llx 0x%010llx] size %8lld",
>> + suballoc_base + soffset, suballoc_base + eoffset,
>> + eoffset - soffset);
>> +
>> + if (i->fence)
>> + drm_printf(p, " protected by 0x%016llx on context %llu",
>> + i->fence->seqno, i->fence->context);
>> +
>> + drm_puts(p, "\n");
>> + }
>> + spin_unlock(&sa_manager->wq.lock);
>> +}
>> +EXPORT_SYMBOL(drm_suballoc_dump_debug_info);
>> +#endif
>> +MODULE_AUTHOR("Multiple");
>> +MODULE_DESCRIPTION("Range suballocator helper");
>> +MODULE_LICENSE("Dual MIT/GPL");
>> diff --git a/include/drm/drm_suballoc.h b/include/drm/drm_suballoc.h
>> new file mode 100644
>> index 000000000000..a737f996e5ff
>> --- /dev/null
>> +++ b/include/drm/drm_suballoc.h
>> @@ -0,0 +1,106 @@
>> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
>> +/*
>> + * Copyright 2011 Red Hat Inc.
>> + * Copyright © 2022 Intel Corporation
>> + */
>> +#ifndef _DRM_SUBALLOC_H_
>> +#define _DRM_SUBALLOC_H_
>> +
>> +#include <drm/drm_mm.h>
>> +
>> +#include <linux/dma-fence.h>
>> +#include <linux/types.h>
>> +
>> +#define DRM_SUBALLOC_MAX_QUEUES 32
>> +/**
>> + * struct drm_suballoc_manager - fenced range allocations
>> + * @wq: Wait queue for sleeping allocations on contention.
>> + * @hole: Pointer to first hole node.
>> + * @olist: List of allocated ranges.
>> + * @flist: Array[fence context hash] of queues of fenced allocated
>> ranges.
>> + * @size: Size of the managed range.
>> + * @align: Default alignment for the managed range.
>> + */
>> +struct drm_suballoc_manager {
>> + wait_queue_head_t wq;
>> + struct list_head *hole;
>> + struct list_head olist;
>> + struct list_head flist[DRM_SUBALLOC_MAX_QUEUES];
>> + u64 size;
>> + u64 align;
>> +};
>> +
>> +/**
>> + * struct drm_suballoc - Sub-allocated range
>> + * @olist: List link for list of allocated ranges.
>> + * @flist: List linkk for the manager fenced allocated ranges queues.
>> + * @manager: The drm_suballoc_manager.
>> + * @soffset: Start offset.
>> + * @eoffset: End offset + 1 so that @eoffset - @soffset = size.
>> + * @dma_fence: The fence protecting the allocation.
>> + */
>> +struct drm_suballoc {
>> + struct list_head olist;
>> + struct list_head flist;
>> + struct drm_suballoc_manager *manager;
>> + u64 soffset;
>> + u64 eoffset;
>> + struct dma_fence *fence;
>> +};
>> +
>> +void drm_suballoc_manager_init(struct drm_suballoc_manager *sa_manager,
>> + u64 size, u64 align);
>> +
>> +void drm_suballoc_manager_fini(struct drm_suballoc_manager
>> *sa_manager);
>> +
>> +struct drm_suballoc *
>> +drm_suballoc_new(struct drm_suballoc_manager *sa_manager, u64 size,
>> gfp_t gfp,
>> + bool intr, u64 align);
>> +
>> +void drm_suballoc_free(struct drm_suballoc *sa, struct dma_fence
>> *fence);
>> +
>> +/**
>> + * drm_suballoc_soffset - Range start.
>> + * @sa: The struct drm_suballoc.
>> + *
>> + * Return: The start of the allocated range.
>> + */
>> +static inline u64 drm_suballoc_soffset(struct drm_suballoc *sa)
>> +{
>> + return sa->soffset;
>> +}
>> +
>> +/**
>> + * drm_suballoc_eoffset - Range end.
>> + * @sa: The struct drm_suballoc.
>> + *
>> + * Return: The end of the allocated range + 1.
>> + */
>> +static inline u64 drm_suballoc_eoffset(struct drm_suballoc *sa)
>> +{
>> + return sa->eoffset;
>> +}
>> +
>> +/**
>> + * drm_suballoc_size - Range size.
>> + * @sa: The struct drm_suballoc.
>> + *
>> + * Return: The size of the allocated range.
>> + */
>> +static inline u64 drm_suballoc_size(struct drm_suballoc *sa)
>> +{
>> + return sa->eoffset - sa->soffset;
>> +}
>> +
>> +#ifdef CONFIG_DEBUG_FS
>> +void drm_suballoc_dump_debug_info(struct drm_suballoc_manager
>> *sa_manager,
>> + struct drm_printer *p, u64 suballoc_base);
>> +#else
>> +static inline void
>> +drm_suballoc_dump_debug_info(struct drm_suballoc_manager *sa_manager,
>> + struct drm_printer *p, u64 suballoc_base)
>> +{ }
>> +
>> +#endif
>> +
>> +#endif /* _DRM_SUBALLOC_H_ */
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [Intel-xe] [PATCH 1/3] drm/suballoc: Extract amdgpu_sa.c as generic suballocation helper
2023-02-23 11:22 ` Thomas Hellström
@ 2023-02-23 11:56 ` Christian König
0 siblings, 0 replies; 12+ messages in thread
From: Christian König @ 2023-02-23 11:56 UTC (permalink / raw)
To: Thomas Hellström, dri-devel
Cc: Daniel Vetter, Maarten Lankhorst, intel-xe, Dave Airlie
Am 23.02.23 um 12:22 schrieb Thomas Hellström:
> On 2/23/23 12:13, Christian König wrote:
>> Am 23.02.23 um 11:57 schrieb Thomas Hellström:
>>> From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>>>
>>> Suballocating a buffer object is something that is not driver-specific
>>> and useful for many drivers.
>>>
>>> Use a slightly modified version of amdgpu_sa.c
>>>
>>> v2:
>>> - Style cleanups. (Thomas)
>>> - Added / Modified documentation (Thomas)
>>> - Use u64 for the sizes and offset. The code dates back to 2012 and
>>> using unsigned int will probably soon come back to bite us.
>>> We can consider size_t as well for better 32-bit efficiency.
>>> (Thomas)
>>> - Add and document gfp, intr and align arguments to drm_suballoc_new()
>>> (Thomas)
>>> - Use drm_printer for debug output (Thomas)
>>>
>>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>>> Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>> ---
>>> drivers/gpu/drm/Kconfig | 4 +
>>> drivers/gpu/drm/Makefile | 3 +
>>> drivers/gpu/drm/drm_suballoc.c | 457
>>> +++++++++++++++++++++++++++++++++
>>> include/drm/drm_suballoc.h | 106 ++++++++
>>> 4 files changed, 570 insertions(+)
>>> create mode 100644 drivers/gpu/drm/drm_suballoc.c
>>> create mode 100644 include/drm/drm_suballoc.h
>>>
>>> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
>>> index dc0f94f02a82..8fbe57407c60 100644
>>> --- a/drivers/gpu/drm/Kconfig
>>> +++ b/drivers/gpu/drm/Kconfig
>>> @@ -232,6 +232,10 @@ config DRM_GEM_SHMEM_HELPER
>>> help
>>> Choose this if you need the GEM shmem helper functions
>>> +config DRM_SUBALLOC_HELPER
>>> + tristate
>>> + depends on DRM
>>> +
>>> config DRM_SCHED
>>> tristate
>>> depends on DRM
>>> diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
>>> index ab4460fcd63f..1e04d135e866 100644
>>> --- a/drivers/gpu/drm/Makefile
>>> +++ b/drivers/gpu/drm/Makefile
>>> @@ -88,6 +88,9 @@ obj-$(CONFIG_DRM_GEM_DMA_HELPER) += drm_dma_helper.o
>>> drm_shmem_helper-y := drm_gem_shmem_helper.o
>>> obj-$(CONFIG_DRM_GEM_SHMEM_HELPER) += drm_shmem_helper.o
>>> +drm_suballoc_helper-y := drm_suballoc.o
>>> +obj-$(CONFIG_DRM_SUBALLOC_HELPER) += drm_suballoc_helper.o
>>> +
>>> drm_vram_helper-y := drm_gem_vram_helper.o
>>> obj-$(CONFIG_DRM_VRAM_HELPER) += drm_vram_helper.o
>>> diff --git a/drivers/gpu/drm/drm_suballoc.c
>>> b/drivers/gpu/drm/drm_suballoc.c
>>> new file mode 100644
>>> index 000000000000..057cd19c44ba
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/drm_suballoc.c
>>> @@ -0,0 +1,457 @@
>>> +// SPDX-License-Identifier: GPL-2.0 OR MIT
>>> +/*
>>> + * Copyright 2011 Red Hat Inc.
>>> + * Copyright 2023 Intel Corporation.
>>> + * All Rights Reserved.
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person
>>> obtaining a
>>> + * copy of this software and associated documentation files (the
>>> + * "Software"), to deal in the Software without restriction, including
>>> + * without limitation the rights to use, copy, modify, merge, publish,
>>> + * distribute, sub license, and/or sell copies of the Software, and to
>>> + * permit persons to whom the Software is furnished to do so,
>>> subject to
>>> + * the following conditions:
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>>> EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>> MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO
>>> EVENT SHALL
>>> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE
>>> FOR ANY CLAIM,
>>> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
>>> TORT OR
>>> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
>>> SOFTWARE OR THE
>>> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
>>> + *
>>> + * The above copyright notice and this permission notice (including
>>> the
>>> + * next paragraph) shall be included in all copies or substantial
>>> portions
>>> + * of the Software.
>>> + *
>>> + */
>>> +/*
>>> + * Authors:
>>> + * Jerome Glisse <glisse@freedesktop.org>
>>> + */
>>
>> We should probably update this or just leave it out.
>>
> You mean the author info? Sure, can leave it out.
>
>> Apart from that Reviewed-by: Christian König <christian.koenig@amd.com>.
>
> Thanks for reviewing. I'll update this and the comment on the amdgpu
> patch.
>
> Is drm-misc a suitable tree for this or any other preferences?
drm-misc-next sounds perfectly fine to me for stuff like this.
Christian.
>
> /Thomas
>
>>
>> Regards,
>> Christian.
>>
>>> +/* Algorithm:
>>> + *
>>> + * We store the last allocated bo in "hole", we always try to allocate
>>> + * after the last allocated bo. Principle is that in a linear GPU ring
>>> + * progression was is after last is the oldest bo we allocated and
>>> thus
>>> + * the first one that should no longer be in use by the GPU.
>>> + *
>>> + * If it's not the case we skip over the bo after last to the closest
>>> + * done bo if such one exist. If none exist and we are not asked to
>>> + * block we report failure to allocate.
>>> + *
>>> + * If we are asked to block we wait on all the oldest fence of all
>>> + * rings. We just wait for any of those fence to complete.
>>> + */
>>> +
>>> +#include <drm/drm_suballoc.h>
>>> +#include <drm/drm_print.h>
>>> +#include <linux/slab.h>
>>> +#include <linux/sched.h>
>>> +#include <linux/wait.h>
>>> +#include <linux/dma-fence.h>
>>> +
>>> +static void drm_suballoc_remove_locked(struct drm_suballoc *sa);
>>> +static void drm_suballoc_try_free(struct drm_suballoc_manager
>>> *sa_manager);
>>> +
>>> +/**
>>> + * drm_suballoc_manager_init() - Initialise the drm_suballoc_manager
>>> + * @sa_manager: pointer to the sa_manager
>>> + * @size: number of bytes we want to suballocate
>>> + * @align: alignment for each suballocated chunk
>>> + *
>>> + * Prepares the suballocation manager for suballocations.
>>> + */
>>> +void drm_suballoc_manager_init(struct drm_suballoc_manager
>>> *sa_manager,
>>> + u64 size, u64 align)
>>> +{
>>> + unsigned int i;
>>> +
>>> + if (!align)
>>> + align = 1;
>>> +
>>> + /* alignment must be a power of 2 */
>>> + if (WARN_ON_ONCE(align & (align - 1)))
>>> + align = roundup_pow_of_two(align);
>>> +
>>> + init_waitqueue_head(&sa_manager->wq);
>>> + sa_manager->size = size;
>>> + sa_manager->align = align;
>>> + sa_manager->hole = &sa_manager->olist;
>>> + INIT_LIST_HEAD(&sa_manager->olist);
>>> + for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i)
>>> + INIT_LIST_HEAD(&sa_manager->flist[i]);
>>> +}
>>> +EXPORT_SYMBOL(drm_suballoc_manager_init);
>>> +
>>> +/**
>>> + * drm_suballoc_manager_fini() - Destroy the drm_suballoc_manager
>>> + * @sa_manager: pointer to the sa_manager
>>> + *
>>> + * Cleans up the suballocation manager after use. All fences added
>>> + * with drm_suballoc_free() must be signaled, or we cannot clean up
>>> + * the entire manager.
>>> + */
>>> +void drm_suballoc_manager_fini(struct drm_suballoc_manager
>>> *sa_manager)
>>> +{
>>> + struct drm_suballoc *sa, *tmp;
>>> +
>>> + if (!sa_manager->size)
>>> + return;
>>> +
>>> + if (!list_empty(&sa_manager->olist)) {
>>> + sa_manager->hole = &sa_manager->olist;
>>> + drm_suballoc_try_free(sa_manager);
>>> + if (!list_empty(&sa_manager->olist))
>>> + DRM_ERROR("sa_manager is not empty, clearing anyway\n");
>>> + }
>>> + list_for_each_entry_safe(sa, tmp, &sa_manager->olist, olist) {
>>> + drm_suballoc_remove_locked(sa);
>>> + }
>>> +
>>> + sa_manager->size = 0;
>>> +}
>>> +EXPORT_SYMBOL(drm_suballoc_manager_fini);
>>> +
>>> +static void drm_suballoc_remove_locked(struct drm_suballoc *sa)
>>> +{
>>> + struct drm_suballoc_manager *sa_manager = sa->manager;
>>> +
>>> + if (sa_manager->hole == &sa->olist)
>>> + sa_manager->hole = sa->olist.prev;
>>> +
>>> + list_del_init(&sa->olist);
>>> + list_del_init(&sa->flist);
>>> + dma_fence_put(sa->fence);
>>> + kfree(sa);
>>> +}
>>> +
>>> +static void drm_suballoc_try_free(struct drm_suballoc_manager
>>> *sa_manager)
>>> +{
>>> + struct drm_suballoc *sa, *tmp;
>>> +
>>> + if (sa_manager->hole->next == &sa_manager->olist)
>>> + return;
>>> +
>>> + sa = list_entry(sa_manager->hole->next, struct drm_suballoc,
>>> olist);
>>> + list_for_each_entry_safe_from(sa, tmp, &sa_manager->olist,
>>> olist) {
>>> + if (!sa->fence || !dma_fence_is_signaled(sa->fence))
>>> + return;
>>> +
>>> + drm_suballoc_remove_locked(sa);
>>> + }
>>> +}
>>> +
>>> +static u64 drm_suballoc_hole_soffset(struct drm_suballoc_manager
>>> *sa_manager)
>>> +{
>>> + struct list_head *hole = sa_manager->hole;
>>> +
>>> + if (hole != &sa_manager->olist)
>>> + return list_entry(hole, struct drm_suballoc, olist)->eoffset;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static u64 drm_suballoc_hole_eoffset(struct drm_suballoc_manager
>>> *sa_manager)
>>> +{
>>> + struct list_head *hole = sa_manager->hole;
>>> +
>>> + if (hole->next != &sa_manager->olist)
>>> + return list_entry(hole->next, struct drm_suballoc,
>>> olist)->soffset;
>>> + return sa_manager->size;
>>> +}
>>> +
>>> +static bool drm_suballoc_try_alloc(struct drm_suballoc_manager
>>> *sa_manager,
>>> + struct drm_suballoc *sa,
>>> + u64 size, u64 align)
>>> +{
>>> + u64 soffset, eoffset, wasted;
>>> +
>>> + soffset = drm_suballoc_hole_soffset(sa_manager);
>>> + eoffset = drm_suballoc_hole_eoffset(sa_manager);
>>> + wasted = (align - (soffset % align)) % align;
>>> +
>>> + if ((eoffset - soffset) >= (size + wasted)) {
>>> + soffset += wasted;
>>> +
>>> + sa->manager = sa_manager;
>>> + sa->soffset = soffset;
>>> + sa->eoffset = soffset + size;
>>> + list_add(&sa->olist, sa_manager->hole);
>>> + INIT_LIST_HEAD(&sa->flist);
>>> + sa_manager->hole = &sa->olist;
>>> + return true;
>>> + }
>>> + return false;
>>> +}
>>> +
>>> +static bool __drm_suballoc_event(struct drm_suballoc_manager
>>> *sa_manager,
>>> + u64 size, u64 align)
>>> +{
>>> + u64 soffset, eoffset, wasted;
>>> + unsigned int i;
>>> +
>>> + for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i)
>>> + if (!list_empty(&sa_manager->flist[i]))
>>> + return true;
>>> +
>>> + soffset = drm_suballoc_hole_soffset(sa_manager);
>>> + eoffset = drm_suballoc_hole_eoffset(sa_manager);
>>> + wasted = (align - (soffset % align)) % align;
>>> +
>>> + return ((eoffset - soffset) >= (size + wasted));
>>> +}
>>> +
>>> +/**
>>> + * drm_suballoc_event() - Check if we can stop waiting
>>> + * @sa_manager: pointer to the sa_manager
>>> + * @size: number of bytes we want to allocate
>>> + * @align: alignment we need to match
>>> + *
>>> + * Return: true if either there is a fence we can wait for or
>>> + * enough free memory to satisfy the allocation directly.
>>> + * false otherwise.
>>> + */
>>> +static bool drm_suballoc_event(struct drm_suballoc_manager
>>> *sa_manager,
>>> + u64 size, u64 align)
>>> +{
>>> + bool ret;
>>> +
>>> + spin_lock(&sa_manager->wq.lock);
>>> + ret = __drm_suballoc_event(sa_manager, size, align);
>>> + spin_unlock(&sa_manager->wq.lock);
>>> + return ret;
>>> +}
>>> +
>>> +static bool drm_suballoc_next_hole(struct drm_suballoc_manager
>>> *sa_manager,
>>> + struct dma_fence **fences,
>>> + unsigned int *tries)
>>> +{
>>> + struct drm_suballoc *best_bo = NULL;
>>> + unsigned int i, best_idx;
>>> + u64 soffset, best, tmp;
>>> +
>>> + /* if hole points to the end of the buffer */
>>> + if (sa_manager->hole->next == &sa_manager->olist) {
>>> + /* try again with its beginning */
>>> + sa_manager->hole = &sa_manager->olist;
>>> + return true;
>>> + }
>>> +
>>> + soffset = drm_suballoc_hole_soffset(sa_manager);
>>> + /* to handle wrap around we add sa_manager->size */
>>> + best = sa_manager->size * 2;
>>> + /* go over all fence list and try to find the closest sa
>>> + * of the current last
>>> + */
>>> + for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i) {
>>> + struct drm_suballoc *sa;
>>> +
>>> + fences[i] = NULL;
>>> +
>>> + if (list_empty(&sa_manager->flist[i]))
>>> + continue;
>>> +
>>> + sa = list_first_entry(&sa_manager->flist[i],
>>> + struct drm_suballoc, flist);
>>> +
>>> + if (!dma_fence_is_signaled(sa->fence)) {
>>> + fences[i] = sa->fence;
>>> + continue;
>>> + }
>>> +
>>> + /* limit the number of tries each freelist gets */
>>> + if (tries[i] > 2)
>>> + continue;
>>> +
>>> + tmp = sa->soffset;
>>> + if (tmp < soffset) {
>>> + /* wrap around, pretend it's after */
>>> + tmp += sa_manager->size;
>>> + }
>>> + tmp -= soffset;
>>> + if (tmp < best) {
>>> + /* this sa bo is the closest one */
>>> + best = tmp;
>>> + best_idx = i;
>>> + best_bo = sa;
>>> + }
>>> + }
>>> +
>>> + if (best_bo) {
>>> + ++tries[best_idx];
>>> + sa_manager->hole = best_bo->olist.prev;
>>> +
>>> + /*
>>> + * We know that this one is signaled,
>>> + * so it's safe to remove it.
>>> + */
>>> + drm_suballoc_remove_locked(best_bo);
>>> + return true;
>>> + }
>>> + return false;
>>> +}
>>> +
>>> +/**
>>> + * drm_suballoc_new() - Make a suballocation.
>>> + * @sa_manager: pointer to the sa_manager
>>> + * @size: number of bytes we want to suballocate.
>>> + * @gfp: gfp flags used for memory allocation. Typically GFP_KERNEL
>>> but
>>> + * the argument is provided for suballocations from reclaim
>>> context or
>>> + * where the caller wants to avoid pipelining rather than
>>> wait for
>>> + * reclaim.
>>> + * @intr: Whether to perform waits interruptible. This should
>>> typically
>>> + * always be true, unless the caller needs to propagate a
>>> + * non-interruptible context from above layers.
>>> + * @align: Alignment. Must not exceed the default manager alignment.
>>> + * If @align is zero, then the manager alignment is used.
>>> + *
>>> + * Try to make a suballocation of size @size, which will be rounded
>>> + * up to the alignment specified in specified in
>>> drm_suballoc_manager_init().
>>> + *
>>> + * Return: a new suballocated bo, or an ERR_PTR.
>>> + */
>>> +struct drm_suballoc *
>>> +drm_suballoc_new(struct drm_suballoc_manager *sa_manager, u64 size,
>>> + gfp_t gfp, bool intr, u64 align)
>>> +{
>>> + struct dma_fence *fences[DRM_SUBALLOC_MAX_QUEUES];
>>> + unsigned int tries[DRM_SUBALLOC_MAX_QUEUES];
>>> + unsigned int count;
>>> + int i, r;
>>> + struct drm_suballoc *sa;
>>> +
>>> + if (WARN_ON_ONCE(align > sa_manager->align))
>>> + return ERR_PTR(-EINVAL);
>>> + if (WARN_ON_ONCE(size > sa_manager->size || !size))
>>> + return ERR_PTR(-EINVAL);
>>> +
>>> + if (!align)
>>> + align = sa_manager->align;
>>> +
>>> + sa = kmalloc(sizeof(*sa), gfp);
>>> + if (!sa)
>>> + return ERR_PTR(-ENOMEM);
>>> + sa->manager = sa_manager;
>>> + sa->fence = NULL;
>>> + INIT_LIST_HEAD(&sa->olist);
>>> + INIT_LIST_HEAD(&sa->flist);
>>> +
>>> + spin_lock(&sa_manager->wq.lock);
>>> + do {
>>> + for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i)
>>> + tries[i] = 0;
>>> +
>>> + do {
>>> + drm_suballoc_try_free(sa_manager);
>>> +
>>> + if (drm_suballoc_try_alloc(sa_manager, sa,
>>> + size, align)) {
>>> + spin_unlock(&sa_manager->wq.lock);
>>> + return sa;
>>> + }
>>> +
>>> + /* see if we can skip over some allocations */
>>> + } while (drm_suballoc_next_hole(sa_manager, fences, tries));
>>> +
>>> + for (i = 0, count = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i)
>>> + if (fences[i])
>>> + fences[count++] = dma_fence_get(fences[i]);
>>> +
>>> + if (count) {
>>> + long t;
>>> +
>>> + spin_unlock(&sa_manager->wq.lock);
>>> + t = dma_fence_wait_any_timeout(fences, count, intr,
>>> + MAX_SCHEDULE_TIMEOUT,
>>> + NULL);
>>> + for (i = 0; i < count; ++i)
>>> + dma_fence_put(fences[i]);
>>> +
>>> + r = (t > 0) ? 0 : t;
>>> + spin_lock(&sa_manager->wq.lock);
>>> + } else if (intr) {
>>> + /* if we have nothing to wait for block */
>>> + r = wait_event_interruptible_locked
>>> + (sa_manager->wq,
>>> + __drm_suballoc_event(sa_manager, size, align));
>>> + } else {
>>> + spin_unlock(&sa_manager->wq.lock);
>>> + wait_event(sa_manager->wq,
>>> + drm_suballoc_event(sa_manager, size, align));
>>> + r = 0;
>>> + spin_lock(&sa_manager->wq.lock);
>>> + }
>>> + } while (!r);
>>> +
>>> + spin_unlock(&sa_manager->wq.lock);
>>> + kfree(sa);
>>> + return ERR_PTR(r);
>>> +}
>>> +EXPORT_SYMBOL(drm_suballoc_new);
>>> +
>>> +/**
>>> + * drm_suballoc_free - Free a suballocation
>>> + * @suballoc: pointer to the suballocation
>>> + * @fence: fence that signals when suballocation is idle
>>> + *
>>> + * Free the suballocation. The suballocation can be re-used after
>>> @fence signals.
>>> + */
>>> +void drm_suballoc_free(struct drm_suballoc *suballoc,
>>> + struct dma_fence *fence)
>>> +{
>>> + struct drm_suballoc_manager *sa_manager;
>>> +
>>> + if (!suballoc)
>>> + return;
>>> +
>>> + sa_manager = suballoc->manager;
>>> +
>>> + spin_lock(&sa_manager->wq.lock);
>>> + if (fence && !dma_fence_is_signaled(fence)) {
>>> + u64 idx;
>>> +
>>> + suballoc->fence = dma_fence_get(fence);
>>> + idx = fence->context % DRM_SUBALLOC_MAX_QUEUES;
>>> + list_add_tail(&suballoc->flist, &sa_manager->flist[idx]);
>>> + } else {
>>> + drm_suballoc_remove_locked(suballoc);
>>> + }
>>> + wake_up_all_locked(&sa_manager->wq);
>>> + spin_unlock(&sa_manager->wq.lock);
>>> +}
>>> +EXPORT_SYMBOL(drm_suballoc_free);
>>> +
>>> +#ifdef CONFIG_DEBUG_FS
>>> +void drm_suballoc_dump_debug_info(struct drm_suballoc_manager
>>> *sa_manager,
>>> + struct drm_printer *p, u64 suballoc_base)
>>> +{
>>> + struct drm_suballoc *i;
>>> +
>>> + spin_lock(&sa_manager->wq.lock);
>>> + list_for_each_entry(i, &sa_manager->olist, olist) {
>>> + u64 soffset = i->soffset;
>>> + u64 eoffset = i->eoffset;
>>> +
>>> + if (&i->olist == sa_manager->hole)
>>> + drm_puts(p, ">");
>>> + else
>>> + drm_puts(p, " ");
>>> +
>>> + drm_printf(p, "[0x%010llx 0x%010llx] size %8lld",
>>> + suballoc_base + soffset, suballoc_base + eoffset,
>>> + eoffset - soffset);
>>> +
>>> + if (i->fence)
>>> + drm_printf(p, " protected by 0x%016llx on context %llu",
>>> + i->fence->seqno, i->fence->context);
>>> +
>>> + drm_puts(p, "\n");
>>> + }
>>> + spin_unlock(&sa_manager->wq.lock);
>>> +}
>>> +EXPORT_SYMBOL(drm_suballoc_dump_debug_info);
>>> +#endif
>>> +MODULE_AUTHOR("Multiple");
>>> +MODULE_DESCRIPTION("Range suballocator helper");
>>> +MODULE_LICENSE("Dual MIT/GPL");
>>> diff --git a/include/drm/drm_suballoc.h b/include/drm/drm_suballoc.h
>>> new file mode 100644
>>> index 000000000000..a737f996e5ff
>>> --- /dev/null
>>> +++ b/include/drm/drm_suballoc.h
>>> @@ -0,0 +1,106 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
>>> +/*
>>> + * Copyright 2011 Red Hat Inc.
>>> + * Copyright © 2022 Intel Corporation
>>> + */
>>> +#ifndef _DRM_SUBALLOC_H_
>>> +#define _DRM_SUBALLOC_H_
>>> +
>>> +#include <drm/drm_mm.h>
>>> +
>>> +#include <linux/dma-fence.h>
>>> +#include <linux/types.h>
>>> +
>>> +#define DRM_SUBALLOC_MAX_QUEUES 32
>>> +/**
>>> + * struct drm_suballoc_manager - fenced range allocations
>>> + * @wq: Wait queue for sleeping allocations on contention.
>>> + * @hole: Pointer to first hole node.
>>> + * @olist: List of allocated ranges.
>>> + * @flist: Array[fence context hash] of queues of fenced allocated
>>> ranges.
>>> + * @size: Size of the managed range.
>>> + * @align: Default alignment for the managed range.
>>> + */
>>> +struct drm_suballoc_manager {
>>> + wait_queue_head_t wq;
>>> + struct list_head *hole;
>>> + struct list_head olist;
>>> + struct list_head flist[DRM_SUBALLOC_MAX_QUEUES];
>>> + u64 size;
>>> + u64 align;
>>> +};
>>> +
>>> +/**
>>> + * struct drm_suballoc - Sub-allocated range
>>> + * @olist: List link for list of allocated ranges.
>>> + * @flist: List linkk for the manager fenced allocated ranges queues.
>>> + * @manager: The drm_suballoc_manager.
>>> + * @soffset: Start offset.
>>> + * @eoffset: End offset + 1 so that @eoffset - @soffset = size.
>>> + * @dma_fence: The fence protecting the allocation.
>>> + */
>>> +struct drm_suballoc {
>>> + struct list_head olist;
>>> + struct list_head flist;
>>> + struct drm_suballoc_manager *manager;
>>> + u64 soffset;
>>> + u64 eoffset;
>>> + struct dma_fence *fence;
>>> +};
>>> +
>>> +void drm_suballoc_manager_init(struct drm_suballoc_manager
>>> *sa_manager,
>>> + u64 size, u64 align);
>>> +
>>> +void drm_suballoc_manager_fini(struct drm_suballoc_manager
>>> *sa_manager);
>>> +
>>> +struct drm_suballoc *
>>> +drm_suballoc_new(struct drm_suballoc_manager *sa_manager, u64 size,
>>> gfp_t gfp,
>>> + bool intr, u64 align);
>>> +
>>> +void drm_suballoc_free(struct drm_suballoc *sa, struct dma_fence
>>> *fence);
>>> +
>>> +/**
>>> + * drm_suballoc_soffset - Range start.
>>> + * @sa: The struct drm_suballoc.
>>> + *
>>> + * Return: The start of the allocated range.
>>> + */
>>> +static inline u64 drm_suballoc_soffset(struct drm_suballoc *sa)
>>> +{
>>> + return sa->soffset;
>>> +}
>>> +
>>> +/**
>>> + * drm_suballoc_eoffset - Range end.
>>> + * @sa: The struct drm_suballoc.
>>> + *
>>> + * Return: The end of the allocated range + 1.
>>> + */
>>> +static inline u64 drm_suballoc_eoffset(struct drm_suballoc *sa)
>>> +{
>>> + return sa->eoffset;
>>> +}
>>> +
>>> +/**
>>> + * drm_suballoc_size - Range size.
>>> + * @sa: The struct drm_suballoc.
>>> + *
>>> + * Return: The size of the allocated range.
>>> + */
>>> +static inline u64 drm_suballoc_size(struct drm_suballoc *sa)
>>> +{
>>> + return sa->eoffset - sa->soffset;
>>> +}
>>> +
>>> +#ifdef CONFIG_DEBUG_FS
>>> +void drm_suballoc_dump_debug_info(struct drm_suballoc_manager
>>> *sa_manager,
>>> + struct drm_printer *p, u64 suballoc_base);
>>> +#else
>>> +static inline void
>>> +drm_suballoc_dump_debug_info(struct drm_suballoc_manager *sa_manager,
>>> + struct drm_printer *p, u64 suballoc_base)
>>> +{ }
>>> +
>>> +#endif
>>> +
>>> +#endif /* _DRM_SUBALLOC_H_ */
>>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [Intel-xe] [PATCH 2/3] drm/amd: Convert amdgpu to use suballocation helper.
2023-02-23 11:15 ` Christian König
@ 2023-02-23 14:29 ` Thomas Hellström
2023-02-23 16:22 ` Christian König
0 siblings, 1 reply; 12+ messages in thread
From: Thomas Hellström @ 2023-02-23 14:29 UTC (permalink / raw)
To: Christian König, dri-devel
Cc: Daniel Vetter, Maarten Lankhorst, intel-xe, Dave Airlie
On 2/23/23 12:15, Christian König wrote:
> Am 23.02.23 um 11:57 schrieb Thomas Hellström:
>> From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>>
>> Now that we have a generic suballocation helper, Use it in amdgpu.
>> For lines that get moved or changed, also fix up pre-existing style
>> issues.
>>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>> Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> ---
>> drivers/gpu/drm/Kconfig | 1 +
>> drivers/gpu/drm/amd/amdgpu/Kconfig | 1 +
>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 26 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 5 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 23 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 324 ++-------------------
>> 7 files changed, 46 insertions(+), 337 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
>> index 8fbe57407c60..73ddfdf3a894 100644
>> --- a/drivers/gpu/drm/Kconfig
>> +++ b/drivers/gpu/drm/Kconfig
>> @@ -77,6 +77,7 @@ config DRM_KUNIT_TEST
>> select DRM_DISPLAY_HELPER
>> select DRM_LIB_RANDOM
>> select DRM_KMS_HELPER
>> + select DRM_SUBALLOC_HELPER
>> select DRM_BUDDY
>> select DRM_EXPORT_FOR_TESTS if m
>> select DRM_KUNIT_TEST_HELPERS
>
> This looks like it's misplaced, apart from that the patch looks good
> to me.
Looks like a TAB vs spaces issue. The resulting file looks correct. Also
added the same select for Radeon in the following patch which was forgotten.
Added your R-B to all patches, even if it wasn't exlicit for this one.
Please let me know if I misunderstood that one.
Thanks,
Thomas
>
> Regards,
> Christian.
>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig
>> b/drivers/gpu/drm/amd/amdgpu/Kconfig
>> index 5341b6b242c3..0ed12171450b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
>> @@ -18,6 +18,7 @@ config DRM_AMDGPU
>> select BACKLIGHT_CLASS_DEVICE
>> select INTERVAL_TREE
>> select DRM_BUDDY
>> + select DRM_SUBALLOC_HELPER
>> # amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select
>> to work
>> # ACPI_VIDEO's dependencies must also be selected.
>> select INPUT if ACPI
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 164141bc8b4a..dda88090f044 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -424,29 +424,11 @@ struct amdgpu_clock {
>> * alignment).
>> */
>> -#define AMDGPU_SA_NUM_FENCE_LISTS 32
>> -
>> struct amdgpu_sa_manager {
>> - wait_queue_head_t wq;
>> - struct amdgpu_bo *bo;
>> - struct list_head *hole;
>> - struct list_head flist[AMDGPU_SA_NUM_FENCE_LISTS];
>> - struct list_head olist;
>> - unsigned size;
>> - uint64_t gpu_addr;
>> - void *cpu_ptr;
>> - uint32_t domain;
>> - uint32_t align;
>> -};
>> -
>> -/* sub-allocation buffer */
>> -struct amdgpu_sa_bo {
>> - struct list_head olist;
>> - struct list_head flist;
>> - struct amdgpu_sa_manager *manager;
>> - unsigned soffset;
>> - unsigned eoffset;
>> - struct dma_fence *fence;
>> + struct drm_suballoc_manager base;
>> + struct amdgpu_bo *bo;
>> + uint64_t gpu_addr;
>> + void *cpu_ptr;
>> };
>> int amdgpu_fence_slab_init(void);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> index bcccc348dbe2..df7eb0b7c4b9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> @@ -69,7 +69,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev,
>> struct amdgpu_vm *vm,
>> if (size) {
>> r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type],
>> - &ib->sa_bo, size, 256);
>> + &ib->sa_bo, size);
>> if (r) {
>> dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
>> return r;
>> @@ -309,8 +309,7 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
>> for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
>> r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i],
>> - AMDGPU_IB_POOL_SIZE,
>> - AMDGPU_GPU_PAGE_SIZE,
>> + AMDGPU_IB_POOL_SIZE, 256,
>> AMDGPU_GEM_DOMAIN_GTT);
>> if (r)
>> goto error;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> index 93207badf83f..5a85726ce853 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> @@ -336,15 +336,22 @@ uint32_t amdgpu_bo_get_preferred_domain(struct
>> amdgpu_device *adev,
>> /*
>> * sub allocation
>> */
>> +static inline struct amdgpu_sa_manager *
>> +to_amdgpu_sa_manager(struct drm_suballoc_manager *manager)
>> +{
>> + return container_of(manager, struct amdgpu_sa_manager, base);
>> +}
>> -static inline uint64_t amdgpu_sa_bo_gpu_addr(struct amdgpu_sa_bo
>> *sa_bo)
>> +static inline uint64_t amdgpu_sa_bo_gpu_addr(struct drm_suballoc
>> *sa_bo)
>> {
>> - return sa_bo->manager->gpu_addr + sa_bo->soffset;
>> + return to_amdgpu_sa_manager(sa_bo->manager)->gpu_addr +
>> + drm_suballoc_soffset(sa_bo);
>> }
>> -static inline void * amdgpu_sa_bo_cpu_addr(struct amdgpu_sa_bo
>> *sa_bo)
>> +static inline void *amdgpu_sa_bo_cpu_addr(struct drm_suballoc *sa_bo)
>> {
>> - return sa_bo->manager->cpu_ptr + sa_bo->soffset;
>> + return to_amdgpu_sa_manager(sa_bo->manager)->cpu_ptr +
>> + drm_suballoc_soffset(sa_bo);
>> }
>> int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
>> @@ -355,11 +362,11 @@ void amdgpu_sa_bo_manager_fini(struct
>> amdgpu_device *adev,
>> int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
>> struct amdgpu_sa_manager *sa_manager);
>> int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
>> - struct amdgpu_sa_bo **sa_bo,
>> - unsigned size, unsigned align);
>> + struct drm_suballoc **sa_bo,
>> + unsigned int size);
>> void amdgpu_sa_bo_free(struct amdgpu_device *adev,
>> - struct amdgpu_sa_bo **sa_bo,
>> - struct dma_fence *fence);
>> + struct drm_suballoc **sa_bo,
>> + struct dma_fence *fence);
>> #if defined(CONFIG_DEBUG_FS)
>> void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager
>> *sa_manager,
>> struct seq_file *m);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> index 3989e755a5b4..018f36b10de8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> @@ -27,6 +27,7 @@
>> #include <drm/amdgpu_drm.h>
>> #include <drm/gpu_scheduler.h>
>> #include <drm/drm_print.h>
>> +#include <drm/drm_suballoc.h>
>> struct amdgpu_device;
>> struct amdgpu_ring;
>> @@ -92,7 +93,7 @@ enum amdgpu_ib_pool_type {
>> };
>> struct amdgpu_ib {
>> - struct amdgpu_sa_bo *sa_bo;
>> + struct drm_suballoc *sa_bo;
>> uint32_t length_dw;
>> uint64_t gpu_addr;
>> uint32_t *ptr;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
>> index 524d10b21041..c6b4337eb20c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
>> @@ -44,327 +44,63 @@
>> #include "amdgpu.h"
>> -static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo);
>> -static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager
>> *sa_manager);
>> -
>> int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
>> struct amdgpu_sa_manager *sa_manager,
>> - unsigned size, u32 align, u32 domain)
>> + unsigned int size, u32 suballoc_align, u32 domain)
>> {
>> - int i, r;
>> -
>> - init_waitqueue_head(&sa_manager->wq);
>> - sa_manager->bo = NULL;
>> - sa_manager->size = size;
>> - sa_manager->domain = domain;
>> - sa_manager->align = align;
>> - sa_manager->hole = &sa_manager->olist;
>> - INIT_LIST_HEAD(&sa_manager->olist);
>> - for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
>> - INIT_LIST_HEAD(&sa_manager->flist[i]);
>> + int r;
>> - r = amdgpu_bo_create_kernel(adev, size, align, domain,
>> &sa_manager->bo,
>> - &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
>> + r = amdgpu_bo_create_kernel(adev, size, AMDGPU_GPU_PAGE_SIZE,
>> domain,
>> + &sa_manager->bo, &sa_manager->gpu_addr,
>> + &sa_manager->cpu_ptr);
>> if (r) {
>> dev_err(adev->dev, "(%d) failed to allocate bo for
>> manager\n", r);
>> return r;
>> }
>> - memset(sa_manager->cpu_ptr, 0, sa_manager->size);
>> + memset(sa_manager->cpu_ptr, 0, size);
>> + drm_suballoc_manager_init(&sa_manager->base, size, suballoc_align);
>> return r;
>> }
>> void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
>> struct amdgpu_sa_manager *sa_manager)
>> {
>> - struct amdgpu_sa_bo *sa_bo, *tmp;
>> -
>> if (sa_manager->bo == NULL) {
>> dev_err(adev->dev, "no bo for sa manager\n");
>> return;
>> }
>> - if (!list_empty(&sa_manager->olist)) {
>> - sa_manager->hole = &sa_manager->olist,
>> - amdgpu_sa_bo_try_free(sa_manager);
>> - if (!list_empty(&sa_manager->olist)) {
>> - dev_err(adev->dev, "sa_manager is not empty, clearing
>> anyway\n");
>> - }
>> - }
>> - list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
>> - amdgpu_sa_bo_remove_locked(sa_bo);
>> - }
>> + drm_suballoc_manager_fini(&sa_manager->base);
>> amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr,
>> &sa_manager->cpu_ptr);
>> - sa_manager->size = 0;
>> }
>> -static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
>> -{
>> - struct amdgpu_sa_manager *sa_manager = sa_bo->manager;
>> - if (sa_manager->hole == &sa_bo->olist) {
>> - sa_manager->hole = sa_bo->olist.prev;
>> - }
>> - list_del_init(&sa_bo->olist);
>> - list_del_init(&sa_bo->flist);
>> - dma_fence_put(sa_bo->fence);
>> - kfree(sa_bo);
>> -}
>> -
>> -static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager)
>> +int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
>> + struct drm_suballoc **sa_bo,
>> + unsigned int size)
>> {
>> - struct amdgpu_sa_bo *sa_bo, *tmp;
>> + struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size,
>> + GFP_KERNEL, true, 0);
>> - if (sa_manager->hole->next == &sa_manager->olist)
>> - return;
>> + if (IS_ERR(sa)) {
>> + *sa_bo = NULL;
>> - sa_bo = list_entry(sa_manager->hole->next, struct
>> amdgpu_sa_bo, olist);
>> - list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist,
>> olist) {
>> - if (sa_bo->fence == NULL ||
>> - !dma_fence_is_signaled(sa_bo->fence)) {
>> - return;
>> - }
>> - amdgpu_sa_bo_remove_locked(sa_bo);
>> + return PTR_ERR(sa);
>> }
>> -}
>> -static inline unsigned amdgpu_sa_bo_hole_soffset(struct
>> amdgpu_sa_manager *sa_manager)
>> -{
>> - struct list_head *hole = sa_manager->hole;
>> -
>> - if (hole != &sa_manager->olist) {
>> - return list_entry(hole, struct amdgpu_sa_bo, olist)->eoffset;
>> - }
>> + *sa_bo = sa;
>> return 0;
>> }
>> -static inline unsigned amdgpu_sa_bo_hole_eoffset(struct
>> amdgpu_sa_manager *sa_manager)
>> -{
>> - struct list_head *hole = sa_manager->hole;
>> -
>> - if (hole->next != &sa_manager->olist) {
>> - return list_entry(hole->next, struct amdgpu_sa_bo,
>> olist)->soffset;
>> - }
>> - return sa_manager->size;
>> -}
>> -
>> -static bool amdgpu_sa_bo_try_alloc(struct amdgpu_sa_manager
>> *sa_manager,
>> - struct amdgpu_sa_bo *sa_bo,
>> - unsigned size, unsigned align)
>> -{
>> - unsigned soffset, eoffset, wasted;
>> -
>> - soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
>> - eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
>> - wasted = (align - (soffset % align)) % align;
>> -
>> - if ((eoffset - soffset) >= (size + wasted)) {
>> - soffset += wasted;
>> -
>> - sa_bo->manager = sa_manager;
>> - sa_bo->soffset = soffset;
>> - sa_bo->eoffset = soffset + size;
>> - list_add(&sa_bo->olist, sa_manager->hole);
>> - INIT_LIST_HEAD(&sa_bo->flist);
>> - sa_manager->hole = &sa_bo->olist;
>> - return true;
>> - }
>> - return false;
>> -}
>> -
>> -/**
>> - * amdgpu_sa_event - Check if we can stop waiting
>> - *
>> - * @sa_manager: pointer to the sa_manager
>> - * @size: number of bytes we want to allocate
>> - * @align: alignment we need to match
>> - *
>> - * Check if either there is a fence we can wait for or
>> - * enough free memory to satisfy the allocation directly
>> - */
>> -static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager,
>> - unsigned size, unsigned align)
>> -{
>> - unsigned soffset, eoffset, wasted;
>> - int i;
>> -
>> - for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
>> - if (!list_empty(&sa_manager->flist[i]))
>> - return true;
>> -
>> - soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
>> - eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
>> - wasted = (align - (soffset % align)) % align;
>> -
>> - if ((eoffset - soffset) >= (size + wasted)) {
>> - return true;
>> - }
>> -
>> - return false;
>> -}
>> -
>> -static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager
>> *sa_manager,
>> - struct dma_fence **fences,
>> - unsigned *tries)
>> -{
>> - struct amdgpu_sa_bo *best_bo = NULL;
>> - unsigned i, soffset, best, tmp;
>> -
>> - /* if hole points to the end of the buffer */
>> - if (sa_manager->hole->next == &sa_manager->olist) {
>> - /* try again with its beginning */
>> - sa_manager->hole = &sa_manager->olist;
>> - return true;
>> - }
>> -
>> - soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
>> - /* to handle wrap around we add sa_manager->size */
>> - best = sa_manager->size * 2;
>> - /* go over all fence list and try to find the closest sa_bo
>> - * of the current last
>> - */
>> - for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
>> - struct amdgpu_sa_bo *sa_bo;
>> -
>> - fences[i] = NULL;
>> -
>> - if (list_empty(&sa_manager->flist[i]))
>> - continue;
>> -
>> - sa_bo = list_first_entry(&sa_manager->flist[i],
>> - struct amdgpu_sa_bo, flist);
>> -
>> - if (!dma_fence_is_signaled(sa_bo->fence)) {
>> - fences[i] = sa_bo->fence;
>> - continue;
>> - }
>> -
>> - /* limit the number of tries each ring gets */
>> - if (tries[i] > 2) {
>> - continue;
>> - }
>> -
>> - tmp = sa_bo->soffset;
>> - if (tmp < soffset) {
>> - /* wrap around, pretend it's after */
>> - tmp += sa_manager->size;
>> - }
>> - tmp -= soffset;
>> - if (tmp < best) {
>> - /* this sa bo is the closest one */
>> - best = tmp;
>> - best_bo = sa_bo;
>> - }
>> - }
>> -
>> - if (best_bo) {
>> - uint32_t idx = best_bo->fence->context;
>> -
>> - idx %= AMDGPU_SA_NUM_FENCE_LISTS;
>> - ++tries[idx];
>> - sa_manager->hole = best_bo->olist.prev;
>> -
>> - /* we knew that this one is signaled,
>> - so it's save to remote it */
>> - amdgpu_sa_bo_remove_locked(best_bo);
>> - return true;
>> - }
>> - return false;
>> -}
>> -
>> -int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
>> - struct amdgpu_sa_bo **sa_bo,
>> - unsigned size, unsigned align)
>> -{
>> - struct dma_fence *fences[AMDGPU_SA_NUM_FENCE_LISTS];
>> - unsigned tries[AMDGPU_SA_NUM_FENCE_LISTS];
>> - unsigned count;
>> - int i, r;
>> - signed long t;
>> -
>> - if (WARN_ON_ONCE(align > sa_manager->align))
>> - return -EINVAL;
>> -
>> - if (WARN_ON_ONCE(size > sa_manager->size))
>> - return -EINVAL;
>> -
>> - *sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL);
>> - if (!(*sa_bo))
>> - return -ENOMEM;
>> - (*sa_bo)->manager = sa_manager;
>> - (*sa_bo)->fence = NULL;
>> - INIT_LIST_HEAD(&(*sa_bo)->olist);
>> - INIT_LIST_HEAD(&(*sa_bo)->flist);
>> -
>> - spin_lock(&sa_manager->wq.lock);
>> - do {
>> - for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
>> - tries[i] = 0;
>> -
>> - do {
>> - amdgpu_sa_bo_try_free(sa_manager);
>> -
>> - if (amdgpu_sa_bo_try_alloc(sa_manager, *sa_bo,
>> - size, align)) {
>> - spin_unlock(&sa_manager->wq.lock);
>> - return 0;
>> - }
>> -
>> - /* see if we can skip over some allocations */
>> - } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
>> -
>> - for (i = 0, count = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
>> - if (fences[i])
>> - fences[count++] = dma_fence_get(fences[i]);
>> -
>> - if (count) {
>> - spin_unlock(&sa_manager->wq.lock);
>> - t = dma_fence_wait_any_timeout(fences, count, false,
>> - MAX_SCHEDULE_TIMEOUT,
>> - NULL);
>> - for (i = 0; i < count; ++i)
>> - dma_fence_put(fences[i]);
>> -
>> - r = (t > 0) ? 0 : t;
>> - spin_lock(&sa_manager->wq.lock);
>> - } else {
>> - /* if we have nothing to wait for block */
>> - r = wait_event_interruptible_locked(
>> - sa_manager->wq,
>> - amdgpu_sa_event(sa_manager, size, align)
>> - );
>> - }
>> -
>> - } while (!r);
>> -
>> - spin_unlock(&sa_manager->wq.lock);
>> - kfree(*sa_bo);
>> - *sa_bo = NULL;
>> - return r;
>> -}
>> -
>> -void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct
>> amdgpu_sa_bo **sa_bo,
>> +void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct
>> drm_suballoc **sa_bo,
>> struct dma_fence *fence)
>> {
>> - struct amdgpu_sa_manager *sa_manager;
>> -
>> if (sa_bo == NULL || *sa_bo == NULL) {
>> return;
>> }
>> - sa_manager = (*sa_bo)->manager;
>> - spin_lock(&sa_manager->wq.lock);
>> - if (fence && !dma_fence_is_signaled(fence)) {
>> - uint32_t idx;
>> -
>> - (*sa_bo)->fence = dma_fence_get(fence);
>> - idx = fence->context % AMDGPU_SA_NUM_FENCE_LISTS;
>> - list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
>> - } else {
>> - amdgpu_sa_bo_remove_locked(*sa_bo);
>> - }
>> - wake_up_all_locked(&sa_manager->wq);
>> - spin_unlock(&sa_manager->wq.lock);
>> + drm_suballoc_free(*sa_bo, fence);
>> *sa_bo = NULL;
>> }
>> @@ -373,26 +109,8 @@ void amdgpu_sa_bo_free(struct amdgpu_device
>> *adev, struct amdgpu_sa_bo **sa_bo,
>> void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager
>> *sa_manager,
>> struct seq_file *m)
>> {
>> - struct amdgpu_sa_bo *i;
>> -
>> - spin_lock(&sa_manager->wq.lock);
>> - list_for_each_entry(i, &sa_manager->olist, olist) {
>> - uint64_t soffset = i->soffset + sa_manager->gpu_addr;
>> - uint64_t eoffset = i->eoffset + sa_manager->gpu_addr;
>> - if (&i->olist == sa_manager->hole) {
>> - seq_printf(m, ">");
>> - } else {
>> - seq_printf(m, " ");
>> - }
>> - seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
>> - soffset, eoffset, eoffset - soffset);
>> + struct drm_printer p = drm_seq_file_printer(m);
>> - if (i->fence)
>> - seq_printf(m, " protected by 0x%016llx on context %llu",
>> - i->fence->seqno, i->fence->context);
>> -
>> - seq_printf(m, "\n");
>> - }
>> - spin_unlock(&sa_manager->wq.lock);
>> + drm_suballoc_dump_debug_info(&sa_manager->base, &p,
>> sa_manager->gpu_addr);
>> }
>> #endif
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [Intel-xe] [PATCH 2/3] drm/amd: Convert amdgpu to use suballocation helper.
2023-02-23 14:29 ` Thomas Hellström
@ 2023-02-23 16:22 ` Christian König
0 siblings, 0 replies; 12+ messages in thread
From: Christian König @ 2023-02-23 16:22 UTC (permalink / raw)
To: Thomas Hellström, dri-devel
Cc: Daniel Vetter, Maarten Lankhorst, intel-xe, Dave Airlie
Am 23.02.23 um 15:29 schrieb Thomas Hellström:
>
> On 2/23/23 12:15, Christian König wrote:
>> Am 23.02.23 um 11:57 schrieb Thomas Hellström:
>>> From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>>>
>>> Now that we have a generic suballocation helper, Use it in amdgpu.
>>> For lines that get moved or changed, also fix up pre-existing style
>>> issues.
>>>
>>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>>> Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>> ---
>>> drivers/gpu/drm/Kconfig | 1 +
>>> drivers/gpu/drm/amd/amdgpu/Kconfig | 1 +
>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 26 +-
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 5 +-
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 23 +-
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 +-
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 324
>>> ++-------------------
>>> 7 files changed, 46 insertions(+), 337 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
>>> index 8fbe57407c60..73ddfdf3a894 100644
>>> --- a/drivers/gpu/drm/Kconfig
>>> +++ b/drivers/gpu/drm/Kconfig
>>> @@ -77,6 +77,7 @@ config DRM_KUNIT_TEST
>>> select DRM_DISPLAY_HELPER
>>> select DRM_LIB_RANDOM
>>> select DRM_KMS_HELPER
>>> + select DRM_SUBALLOC_HELPER
>>> select DRM_BUDDY
>>> select DRM_EXPORT_FOR_TESTS if m
>>> select DRM_KUNIT_TEST_HELPERS
>>
>> This looks like it's misplaced, apart from that the patch looks good
>> to me.
>
> Looks like a TAB vs spaces issue. The resulting file looks correct.
> Also added the same select for Radeon in the following patch which was
> forgotten.
That wasn't what I meant. This here is the patch to change amdgpu, but
you are adding the dependency to the KUNIT test.
It looks like that adding this line should be in patch #1, not patch #2.
Regards,
Christian.
>
> Added your R-B to all patches, even if it wasn't exlicit for this one.
> Please let me know if I misunderstood that one.
>
> Thanks,
>
> Thomas
>
>
>>
>> Regards,
>> Christian.
>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig
>>> b/drivers/gpu/drm/amd/amdgpu/Kconfig
>>> index 5341b6b242c3..0ed12171450b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
>>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
>>> @@ -18,6 +18,7 @@ config DRM_AMDGPU
>>> select BACKLIGHT_CLASS_DEVICE
>>> select INTERVAL_TREE
>>> select DRM_BUDDY
>>> + select DRM_SUBALLOC_HELPER
>>> # amdgpu depends on ACPI_VIDEO when ACPI is enabled, for
>>> select to work
>>> # ACPI_VIDEO's dependencies must also be selected.
>>> select INPUT if ACPI
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 164141bc8b4a..dda88090f044 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -424,29 +424,11 @@ struct amdgpu_clock {
>>> * alignment).
>>> */
>>> -#define AMDGPU_SA_NUM_FENCE_LISTS 32
>>> -
>>> struct amdgpu_sa_manager {
>>> - wait_queue_head_t wq;
>>> - struct amdgpu_bo *bo;
>>> - struct list_head *hole;
>>> - struct list_head flist[AMDGPU_SA_NUM_FENCE_LISTS];
>>> - struct list_head olist;
>>> - unsigned size;
>>> - uint64_t gpu_addr;
>>> - void *cpu_ptr;
>>> - uint32_t domain;
>>> - uint32_t align;
>>> -};
>>> -
>>> -/* sub-allocation buffer */
>>> -struct amdgpu_sa_bo {
>>> - struct list_head olist;
>>> - struct list_head flist;
>>> - struct amdgpu_sa_manager *manager;
>>> - unsigned soffset;
>>> - unsigned eoffset;
>>> - struct dma_fence *fence;
>>> + struct drm_suballoc_manager base;
>>> + struct amdgpu_bo *bo;
>>> + uint64_t gpu_addr;
>>> + void *cpu_ptr;
>>> };
>>> int amdgpu_fence_slab_init(void);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>> index bcccc348dbe2..df7eb0b7c4b9 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>> @@ -69,7 +69,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev,
>>> struct amdgpu_vm *vm,
>>> if (size) {
>>> r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type],
>>> - &ib->sa_bo, size, 256);
>>> + &ib->sa_bo, size);
>>> if (r) {
>>> dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
>>> return r;
>>> @@ -309,8 +309,7 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
>>> for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
>>> r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i],
>>> - AMDGPU_IB_POOL_SIZE,
>>> - AMDGPU_GPU_PAGE_SIZE,
>>> + AMDGPU_IB_POOL_SIZE, 256,
>>> AMDGPU_GEM_DOMAIN_GTT);
>>> if (r)
>>> goto error;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> index 93207badf83f..5a85726ce853 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> @@ -336,15 +336,22 @@ uint32_t amdgpu_bo_get_preferred_domain(struct
>>> amdgpu_device *adev,
>>> /*
>>> * sub allocation
>>> */
>>> +static inline struct amdgpu_sa_manager *
>>> +to_amdgpu_sa_manager(struct drm_suballoc_manager *manager)
>>> +{
>>> + return container_of(manager, struct amdgpu_sa_manager, base);
>>> +}
>>> -static inline uint64_t amdgpu_sa_bo_gpu_addr(struct amdgpu_sa_bo
>>> *sa_bo)
>>> +static inline uint64_t amdgpu_sa_bo_gpu_addr(struct drm_suballoc
>>> *sa_bo)
>>> {
>>> - return sa_bo->manager->gpu_addr + sa_bo->soffset;
>>> + return to_amdgpu_sa_manager(sa_bo->manager)->gpu_addr +
>>> + drm_suballoc_soffset(sa_bo);
>>> }
>>> -static inline void * amdgpu_sa_bo_cpu_addr(struct amdgpu_sa_bo
>>> *sa_bo)
>>> +static inline void *amdgpu_sa_bo_cpu_addr(struct drm_suballoc *sa_bo)
>>> {
>>> - return sa_bo->manager->cpu_ptr + sa_bo->soffset;
>>> + return to_amdgpu_sa_manager(sa_bo->manager)->cpu_ptr +
>>> + drm_suballoc_soffset(sa_bo);
>>> }
>>> int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
>>> @@ -355,11 +362,11 @@ void amdgpu_sa_bo_manager_fini(struct
>>> amdgpu_device *adev,
>>> int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
>>> struct amdgpu_sa_manager *sa_manager);
>>> int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
>>> - struct amdgpu_sa_bo **sa_bo,
>>> - unsigned size, unsigned align);
>>> + struct drm_suballoc **sa_bo,
>>> + unsigned int size);
>>> void amdgpu_sa_bo_free(struct amdgpu_device *adev,
>>> - struct amdgpu_sa_bo **sa_bo,
>>> - struct dma_fence *fence);
>>> + struct drm_suballoc **sa_bo,
>>> + struct dma_fence *fence);
>>> #if defined(CONFIG_DEBUG_FS)
>>> void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager
>>> *sa_manager,
>>> struct seq_file *m);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> index 3989e755a5b4..018f36b10de8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> @@ -27,6 +27,7 @@
>>> #include <drm/amdgpu_drm.h>
>>> #include <drm/gpu_scheduler.h>
>>> #include <drm/drm_print.h>
>>> +#include <drm/drm_suballoc.h>
>>> struct amdgpu_device;
>>> struct amdgpu_ring;
>>> @@ -92,7 +93,7 @@ enum amdgpu_ib_pool_type {
>>> };
>>> struct amdgpu_ib {
>>> - struct amdgpu_sa_bo *sa_bo;
>>> + struct drm_suballoc *sa_bo;
>>> uint32_t length_dw;
>>> uint64_t gpu_addr;
>>> uint32_t *ptr;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
>>> index 524d10b21041..c6b4337eb20c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
>>> @@ -44,327 +44,63 @@
>>> #include "amdgpu.h"
>>> -static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo);
>>> -static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager
>>> *sa_manager);
>>> -
>>> int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
>>> struct amdgpu_sa_manager *sa_manager,
>>> - unsigned size, u32 align, u32 domain)
>>> + unsigned int size, u32 suballoc_align, u32 domain)
>>> {
>>> - int i, r;
>>> -
>>> - init_waitqueue_head(&sa_manager->wq);
>>> - sa_manager->bo = NULL;
>>> - sa_manager->size = size;
>>> - sa_manager->domain = domain;
>>> - sa_manager->align = align;
>>> - sa_manager->hole = &sa_manager->olist;
>>> - INIT_LIST_HEAD(&sa_manager->olist);
>>> - for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
>>> - INIT_LIST_HEAD(&sa_manager->flist[i]);
>>> + int r;
>>> - r = amdgpu_bo_create_kernel(adev, size, align, domain,
>>> &sa_manager->bo,
>>> - &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
>>> + r = amdgpu_bo_create_kernel(adev, size, AMDGPU_GPU_PAGE_SIZE,
>>> domain,
>>> + &sa_manager->bo, &sa_manager->gpu_addr,
>>> + &sa_manager->cpu_ptr);
>>> if (r) {
>>> dev_err(adev->dev, "(%d) failed to allocate bo for
>>> manager\n", r);
>>> return r;
>>> }
>>> - memset(sa_manager->cpu_ptr, 0, sa_manager->size);
>>> + memset(sa_manager->cpu_ptr, 0, size);
>>> + drm_suballoc_manager_init(&sa_manager->base, size,
>>> suballoc_align);
>>> return r;
>>> }
>>> void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
>>> struct amdgpu_sa_manager *sa_manager)
>>> {
>>> - struct amdgpu_sa_bo *sa_bo, *tmp;
>>> -
>>> if (sa_manager->bo == NULL) {
>>> dev_err(adev->dev, "no bo for sa manager\n");
>>> return;
>>> }
>>> - if (!list_empty(&sa_manager->olist)) {
>>> - sa_manager->hole = &sa_manager->olist,
>>> - amdgpu_sa_bo_try_free(sa_manager);
>>> - if (!list_empty(&sa_manager->olist)) {
>>> - dev_err(adev->dev, "sa_manager is not empty, clearing
>>> anyway\n");
>>> - }
>>> - }
>>> - list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
>>> - amdgpu_sa_bo_remove_locked(sa_bo);
>>> - }
>>> + drm_suballoc_manager_fini(&sa_manager->base);
>>> amdgpu_bo_free_kernel(&sa_manager->bo,
>>> &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
>>> - sa_manager->size = 0;
>>> }
>>> -static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
>>> -{
>>> - struct amdgpu_sa_manager *sa_manager = sa_bo->manager;
>>> - if (sa_manager->hole == &sa_bo->olist) {
>>> - sa_manager->hole = sa_bo->olist.prev;
>>> - }
>>> - list_del_init(&sa_bo->olist);
>>> - list_del_init(&sa_bo->flist);
>>> - dma_fence_put(sa_bo->fence);
>>> - kfree(sa_bo);
>>> -}
>>> -
>>> -static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager
>>> *sa_manager)
>>> +int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
>>> + struct drm_suballoc **sa_bo,
>>> + unsigned int size)
>>> {
>>> - struct amdgpu_sa_bo *sa_bo, *tmp;
>>> + struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base,
>>> size,
>>> + GFP_KERNEL, true, 0);
>>> - if (sa_manager->hole->next == &sa_manager->olist)
>>> - return;
>>> + if (IS_ERR(sa)) {
>>> + *sa_bo = NULL;
>>> - sa_bo = list_entry(sa_manager->hole->next, struct
>>> amdgpu_sa_bo, olist);
>>> - list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist,
>>> olist) {
>>> - if (sa_bo->fence == NULL ||
>>> - !dma_fence_is_signaled(sa_bo->fence)) {
>>> - return;
>>> - }
>>> - amdgpu_sa_bo_remove_locked(sa_bo);
>>> + return PTR_ERR(sa);
>>> }
>>> -}
>>> -static inline unsigned amdgpu_sa_bo_hole_soffset(struct
>>> amdgpu_sa_manager *sa_manager)
>>> -{
>>> - struct list_head *hole = sa_manager->hole;
>>> -
>>> - if (hole != &sa_manager->olist) {
>>> - return list_entry(hole, struct amdgpu_sa_bo, olist)->eoffset;
>>> - }
>>> + *sa_bo = sa;
>>> return 0;
>>> }
>>> -static inline unsigned amdgpu_sa_bo_hole_eoffset(struct
>>> amdgpu_sa_manager *sa_manager)
>>> -{
>>> - struct list_head *hole = sa_manager->hole;
>>> -
>>> - if (hole->next != &sa_manager->olist) {
>>> - return list_entry(hole->next, struct amdgpu_sa_bo,
>>> olist)->soffset;
>>> - }
>>> - return sa_manager->size;
>>> -}
>>> -
>>> -static bool amdgpu_sa_bo_try_alloc(struct amdgpu_sa_manager
>>> *sa_manager,
>>> - struct amdgpu_sa_bo *sa_bo,
>>> - unsigned size, unsigned align)
>>> -{
>>> - unsigned soffset, eoffset, wasted;
>>> -
>>> - soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
>>> - eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
>>> - wasted = (align - (soffset % align)) % align;
>>> -
>>> - if ((eoffset - soffset) >= (size + wasted)) {
>>> - soffset += wasted;
>>> -
>>> - sa_bo->manager = sa_manager;
>>> - sa_bo->soffset = soffset;
>>> - sa_bo->eoffset = soffset + size;
>>> - list_add(&sa_bo->olist, sa_manager->hole);
>>> - INIT_LIST_HEAD(&sa_bo->flist);
>>> - sa_manager->hole = &sa_bo->olist;
>>> - return true;
>>> - }
>>> - return false;
>>> -}
>>> -
>>> -/**
>>> - * amdgpu_sa_event - Check if we can stop waiting
>>> - *
>>> - * @sa_manager: pointer to the sa_manager
>>> - * @size: number of bytes we want to allocate
>>> - * @align: alignment we need to match
>>> - *
>>> - * Check if either there is a fence we can wait for or
>>> - * enough free memory to satisfy the allocation directly
>>> - */
>>> -static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager,
>>> - unsigned size, unsigned align)
>>> -{
>>> - unsigned soffset, eoffset, wasted;
>>> - int i;
>>> -
>>> - for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
>>> - if (!list_empty(&sa_manager->flist[i]))
>>> - return true;
>>> -
>>> - soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
>>> - eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
>>> - wasted = (align - (soffset % align)) % align;
>>> -
>>> - if ((eoffset - soffset) >= (size + wasted)) {
>>> - return true;
>>> - }
>>> -
>>> - return false;
>>> -}
>>> -
>>> -static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager
>>> *sa_manager,
>>> - struct dma_fence **fences,
>>> - unsigned *tries)
>>> -{
>>> - struct amdgpu_sa_bo *best_bo = NULL;
>>> - unsigned i, soffset, best, tmp;
>>> -
>>> - /* if hole points to the end of the buffer */
>>> - if (sa_manager->hole->next == &sa_manager->olist) {
>>> - /* try again with its beginning */
>>> - sa_manager->hole = &sa_manager->olist;
>>> - return true;
>>> - }
>>> -
>>> - soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
>>> - /* to handle wrap around we add sa_manager->size */
>>> - best = sa_manager->size * 2;
>>> - /* go over all fence list and try to find the closest sa_bo
>>> - * of the current last
>>> - */
>>> - for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
>>> - struct amdgpu_sa_bo *sa_bo;
>>> -
>>> - fences[i] = NULL;
>>> -
>>> - if (list_empty(&sa_manager->flist[i]))
>>> - continue;
>>> -
>>> - sa_bo = list_first_entry(&sa_manager->flist[i],
>>> - struct amdgpu_sa_bo, flist);
>>> -
>>> - if (!dma_fence_is_signaled(sa_bo->fence)) {
>>> - fences[i] = sa_bo->fence;
>>> - continue;
>>> - }
>>> -
>>> - /* limit the number of tries each ring gets */
>>> - if (tries[i] > 2) {
>>> - continue;
>>> - }
>>> -
>>> - tmp = sa_bo->soffset;
>>> - if (tmp < soffset) {
>>> - /* wrap around, pretend it's after */
>>> - tmp += sa_manager->size;
>>> - }
>>> - tmp -= soffset;
>>> - if (tmp < best) {
>>> - /* this sa bo is the closest one */
>>> - best = tmp;
>>> - best_bo = sa_bo;
>>> - }
>>> - }
>>> -
>>> - if (best_bo) {
>>> - uint32_t idx = best_bo->fence->context;
>>> -
>>> - idx %= AMDGPU_SA_NUM_FENCE_LISTS;
>>> - ++tries[idx];
>>> - sa_manager->hole = best_bo->olist.prev;
>>> -
>>> - /* we knew that this one is signaled,
>>> - so it's save to remote it */
>>> - amdgpu_sa_bo_remove_locked(best_bo);
>>> - return true;
>>> - }
>>> - return false;
>>> -}
>>> -
>>> -int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
>>> - struct amdgpu_sa_bo **sa_bo,
>>> - unsigned size, unsigned align)
>>> -{
>>> - struct dma_fence *fences[AMDGPU_SA_NUM_FENCE_LISTS];
>>> - unsigned tries[AMDGPU_SA_NUM_FENCE_LISTS];
>>> - unsigned count;
>>> - int i, r;
>>> - signed long t;
>>> -
>>> - if (WARN_ON_ONCE(align > sa_manager->align))
>>> - return -EINVAL;
>>> -
>>> - if (WARN_ON_ONCE(size > sa_manager->size))
>>> - return -EINVAL;
>>> -
>>> - *sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL);
>>> - if (!(*sa_bo))
>>> - return -ENOMEM;
>>> - (*sa_bo)->manager = sa_manager;
>>> - (*sa_bo)->fence = NULL;
>>> - INIT_LIST_HEAD(&(*sa_bo)->olist);
>>> - INIT_LIST_HEAD(&(*sa_bo)->flist);
>>> -
>>> - spin_lock(&sa_manager->wq.lock);
>>> - do {
>>> - for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
>>> - tries[i] = 0;
>>> -
>>> - do {
>>> - amdgpu_sa_bo_try_free(sa_manager);
>>> -
>>> - if (amdgpu_sa_bo_try_alloc(sa_manager, *sa_bo,
>>> - size, align)) {
>>> - spin_unlock(&sa_manager->wq.lock);
>>> - return 0;
>>> - }
>>> -
>>> - /* see if we can skip over some allocations */
>>> - } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
>>> -
>>> - for (i = 0, count = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
>>> - if (fences[i])
>>> - fences[count++] = dma_fence_get(fences[i]);
>>> -
>>> - if (count) {
>>> - spin_unlock(&sa_manager->wq.lock);
>>> - t = dma_fence_wait_any_timeout(fences, count, false,
>>> - MAX_SCHEDULE_TIMEOUT,
>>> - NULL);
>>> - for (i = 0; i < count; ++i)
>>> - dma_fence_put(fences[i]);
>>> -
>>> - r = (t > 0) ? 0 : t;
>>> - spin_lock(&sa_manager->wq.lock);
>>> - } else {
>>> - /* if we have nothing to wait for block */
>>> - r = wait_event_interruptible_locked(
>>> - sa_manager->wq,
>>> - amdgpu_sa_event(sa_manager, size, align)
>>> - );
>>> - }
>>> -
>>> - } while (!r);
>>> -
>>> - spin_unlock(&sa_manager->wq.lock);
>>> - kfree(*sa_bo);
>>> - *sa_bo = NULL;
>>> - return r;
>>> -}
>>> -
>>> -void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct
>>> amdgpu_sa_bo **sa_bo,
>>> +void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct
>>> drm_suballoc **sa_bo,
>>> struct dma_fence *fence)
>>> {
>>> - struct amdgpu_sa_manager *sa_manager;
>>> -
>>> if (sa_bo == NULL || *sa_bo == NULL) {
>>> return;
>>> }
>>> - sa_manager = (*sa_bo)->manager;
>>> - spin_lock(&sa_manager->wq.lock);
>>> - if (fence && !dma_fence_is_signaled(fence)) {
>>> - uint32_t idx;
>>> -
>>> - (*sa_bo)->fence = dma_fence_get(fence);
>>> - idx = fence->context % AMDGPU_SA_NUM_FENCE_LISTS;
>>> - list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
>>> - } else {
>>> - amdgpu_sa_bo_remove_locked(*sa_bo);
>>> - }
>>> - wake_up_all_locked(&sa_manager->wq);
>>> - spin_unlock(&sa_manager->wq.lock);
>>> + drm_suballoc_free(*sa_bo, fence);
>>> *sa_bo = NULL;
>>> }
>>> @@ -373,26 +109,8 @@ void amdgpu_sa_bo_free(struct amdgpu_device
>>> *adev, struct amdgpu_sa_bo **sa_bo,
>>> void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager
>>> *sa_manager,
>>> struct seq_file *m)
>>> {
>>> - struct amdgpu_sa_bo *i;
>>> -
>>> - spin_lock(&sa_manager->wq.lock);
>>> - list_for_each_entry(i, &sa_manager->olist, olist) {
>>> - uint64_t soffset = i->soffset + sa_manager->gpu_addr;
>>> - uint64_t eoffset = i->eoffset + sa_manager->gpu_addr;
>>> - if (&i->olist == sa_manager->hole) {
>>> - seq_printf(m, ">");
>>> - } else {
>>> - seq_printf(m, " ");
>>> - }
>>> - seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
>>> - soffset, eoffset, eoffset - soffset);
>>> + struct drm_printer p = drm_seq_file_printer(m);
>>> - if (i->fence)
>>> - seq_printf(m, " protected by 0x%016llx on context %llu",
>>> - i->fence->seqno, i->fence->context);
>>> -
>>> - seq_printf(m, "\n");
>>> - }
>>> - spin_unlock(&sa_manager->wq.lock);
>>> + drm_suballoc_dump_debug_info(&sa_manager->base, &p,
>>> sa_manager->gpu_addr);
>>> }
>>> #endif
>>
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2023-02-23 16:22 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-02-23 10:57 [Intel-xe] [PATCH 0/3] drm/helpers: Make the suballocation manager drm generic Thomas Hellström
2023-02-23 10:57 ` [Intel-xe] [PATCH 1/3] drm/suballoc: Extract amdgpu_sa.c as generic suballocation helper Thomas Hellström
2023-02-23 11:13 ` Christian König
2023-02-23 11:22 ` Thomas Hellström
2023-02-23 11:56 ` Christian König
2023-02-23 10:57 ` [Intel-xe] [PATCH 2/3] drm/amd: Convert amdgpu to use " Thomas Hellström
2023-02-23 11:15 ` Christian König
2023-02-23 14:29 ` Thomas Hellström
2023-02-23 16:22 ` Christian König
2023-02-23 10:57 ` [Intel-xe] [PATCH 3/3] drm/radeon: Use the drm suballocation manager implementation Thomas Hellström
2023-02-23 11:18 ` Christian König
-- strict thread matches above, loose matches on Subject: below --
2023-02-16 14:48 [Intel-xe] [PATCH 0/3] drm, drm/amd, drm/radeon: Introduce a generic suballocator Thomas Hellström
2023-02-16 14:48 ` [Intel-xe] [PATCH 2/3] drm/amd: Convert amdgpu to use suballocation helper Thomas Hellström
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox