* [PATCH v2 02/20] drm/ttm: rework pipelined eviction fence handling
2025-11-13 16:05 [PATCH v2 00/20] drm/amdgpu: use all SDMA instances for TTM clears and moves Pierre-Eric Pelloux-Prayer
@ 2025-11-13 16:05 ` Pierre-Eric Pelloux-Prayer
2025-11-14 12:47 ` Christian König
2025-11-18 15:00 ` Thomas Hellström
2025-11-13 16:05 ` [PATCH v2 03/20] drm/amdgpu: remove direct_submit arg from amdgpu_copy_buffer Pierre-Eric Pelloux-Prayer
` (5 subsequent siblings)
6 siblings, 2 replies; 20+ messages in thread
From: Pierre-Eric Pelloux-Prayer @ 2025-11-13 16:05 UTC (permalink / raw)
To: Alex Deucher, Christian König, David Airlie, Simona Vetter,
Huang Rui, Matthew Auld, Matthew Brost, Maarten Lankhorst,
Maxime Ripard, Thomas Zimmermann, Sumit Semwal
Cc: Pierre-Eric Pelloux-Prayer, amd-gfx, dri-devel, linux-kernel,
linux-media, linaro-mm-sig
Until now ttm stored a single pipelined eviction fence which means
drivers had to use a single entity for these evictions.
To lift this requirement, this commit allows up to 8 entities to
be used.
Ideally a dma_resv object would have been used as a container of
the eviction fences, but the locking rules makes it complex.
dma_resv all have the same ww_class, which means "Attempting to
lock more mutexes after ww_acquire_done." is an error.
One alternative considered was to introduced a 2nd ww_class for
specific resv to hold a single "transient" lock (= the resv lock
would only be held for a short period, without taking any other
locks).
The other option, is to statically reserve a fence array, and
extend the existing code to deal with N fences, instead of 1.
The driver is still responsible to reserve the correct number
of fence slots.
---
v2:
- simplified code
- dropped n_fences
- name changes
---
Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 ++--
.../gpu/drm/ttm/tests/ttm_bo_validate_test.c | 11 +++--
drivers/gpu/drm/ttm/tests/ttm_resource_test.c | 5 +-
drivers/gpu/drm/ttm/ttm_bo.c | 47 ++++++++++---------
drivers/gpu/drm/ttm/ttm_bo_util.c | 38 ++++++++++++---
drivers/gpu/drm/ttm/ttm_resource.c | 31 +++++++-----
include/drm/ttm/ttm_resource.h | 29 ++++++++----
7 files changed, 109 insertions(+), 60 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 326476089db3..3b46a24a8c48 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -2156,7 +2156,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
{
struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
uint64_t size;
- int r;
+ int r, i;
if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
@@ -2190,8 +2190,10 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
} else {
drm_sched_entity_destroy(&adev->mman.high_pr);
drm_sched_entity_destroy(&adev->mman.low_pr);
- dma_fence_put(man->move);
- man->move = NULL;
+ for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
+ dma_fence_put(man->eviction_fences[i]);
+ man->eviction_fences[i] = NULL;
+ }
}
/* this just adjusts TTM size idea, which sets lpfn to the correct value */
diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
index 3148f5d3dbd6..8f71906c4238 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
@@ -651,7 +651,7 @@ static void ttm_bo_validate_move_fence_signaled(struct kunit *test)
int err;
man = ttm_manager_type(priv->ttm_dev, mem_type);
- man->move = dma_fence_get_stub();
+ man->eviction_fences[0] = dma_fence_get_stub();
bo = ttm_bo_kunit_init(test, test->priv, size, NULL);
bo->type = bo_type;
@@ -668,7 +668,7 @@ static void ttm_bo_validate_move_fence_signaled(struct kunit *test)
KUNIT_EXPECT_EQ(test, ctx.bytes_moved, size);
ttm_bo_put(bo);
- dma_fence_put(man->move);
+ dma_fence_put(man->eviction_fences[0]);
}
static const struct ttm_bo_validate_test_case ttm_bo_validate_wait_cases[] = {
@@ -732,9 +732,9 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test)
spin_lock_init(&fence_lock);
man = ttm_manager_type(priv->ttm_dev, fst_mem);
- man->move = alloc_mock_fence(test);
+ man->eviction_fences[0] = alloc_mock_fence(test);
- task = kthread_create(threaded_fence_signal, man->move, "move-fence-signal");
+ task = kthread_create(threaded_fence_signal, man->eviction_fences[0], "move-fence-signal");
if (IS_ERR(task))
KUNIT_FAIL(test, "Couldn't create move fence signal task\n");
@@ -742,7 +742,8 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test)
err = ttm_bo_validate(bo, placement_val, &ctx_val);
dma_resv_unlock(bo->base.resv);
- dma_fence_wait_timeout(man->move, false, MAX_SCHEDULE_TIMEOUT);
+ dma_fence_wait_timeout(man->eviction_fences[0], false, MAX_SCHEDULE_TIMEOUT);
+ man->eviction_fences[0] = NULL;
KUNIT_EXPECT_EQ(test, err, 0);
KUNIT_EXPECT_EQ(test, ctx_val.bytes_moved, size);
diff --git a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
index e6ea2bd01f07..c0e4e35e0442 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
@@ -207,6 +207,7 @@ static void ttm_resource_manager_init_basic(struct kunit *test)
struct ttm_resource_test_priv *priv = test->priv;
struct ttm_resource_manager *man;
size_t size = SZ_16K;
+ int i;
man = kunit_kzalloc(test, sizeof(*man), GFP_KERNEL);
KUNIT_ASSERT_NOT_NULL(test, man);
@@ -216,8 +217,8 @@ static void ttm_resource_manager_init_basic(struct kunit *test)
KUNIT_ASSERT_PTR_EQ(test, man->bdev, priv->devs->ttm_dev);
KUNIT_ASSERT_EQ(test, man->size, size);
KUNIT_ASSERT_EQ(test, man->usage, 0);
- KUNIT_ASSERT_NULL(test, man->move);
- KUNIT_ASSERT_NOT_NULL(test, &man->move_lock);
+ for (i = 0; i < TTM_NUM_MOVE_FENCES; i++)
+ KUNIT_ASSERT_NULL(test, man->eviction_fences[i]);
for (int i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
KUNIT_ASSERT_TRUE(test, list_empty(&man->lru[i]));
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index f4d9e68b21e7..0b3732ed6f6c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -658,34 +658,35 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo)
EXPORT_SYMBOL(ttm_bo_unpin);
/*
- * Add the last move fence to the BO as kernel dependency and reserve a new
- * fence slot.
+ * Add the pipelined eviction fencesto the BO as kernel dependency and reserve new
+ * fence slots.
*/
-static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
- struct ttm_resource_manager *man,
- bool no_wait_gpu)
+static int ttm_bo_add_pipelined_eviction_fences(struct ttm_buffer_object *bo,
+ struct ttm_resource_manager *man,
+ bool no_wait_gpu)
{
struct dma_fence *fence;
- int ret;
+ int i;
- spin_lock(&man->move_lock);
- fence = dma_fence_get(man->move);
- spin_unlock(&man->move_lock);
+ spin_lock(&man->eviction_lock);
+ for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
+ fence = man->eviction_fences[i];
+ if (!fence)
+ continue;
- if (!fence)
- return 0;
-
- if (no_wait_gpu) {
- ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY;
- dma_fence_put(fence);
- return ret;
+ if (no_wait_gpu) {
+ if (!dma_fence_is_signaled(fence)) {
+ spin_unlock(&man->eviction_lock);
+ return -EBUSY;
+ }
+ } else {
+ dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
+ }
}
+ spin_unlock(&man->eviction_lock);
- dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
-
- ret = dma_resv_reserve_fences(bo->base.resv, 1);
- dma_fence_put(fence);
- return ret;
+ /* TODO: this call should be removed. */
+ return dma_resv_reserve_fences(bo->base.resv, 1);
}
/**
@@ -718,7 +719,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
int i, ret;
ticket = dma_resv_locking_ctx(bo->base.resv);
- ret = dma_resv_reserve_fences(bo->base.resv, 1);
+ ret = dma_resv_reserve_fences(bo->base.resv, TTM_NUM_MOVE_FENCES);
if (unlikely(ret))
return ret;
@@ -757,7 +758,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
return ret;
}
- ret = ttm_bo_add_move_fence(bo, man, ctx->no_wait_gpu);
+ ret = ttm_bo_add_pipelined_eviction_fences(bo, man, ctx->no_wait_gpu);
if (unlikely(ret)) {
ttm_resource_free(bo, res);
if (ret == -EBUSY)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index acbbca9d5c92..2ff35d55e462 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -258,7 +258,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
ret = dma_resv_trylock(&fbo->base.base._resv);
WARN_ON(!ret);
- ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1);
+ ret = dma_resv_reserve_fences(&fbo->base.base._resv, TTM_NUM_MOVE_FENCES);
if (ret) {
dma_resv_unlock(&fbo->base.base._resv);
kfree(fbo);
@@ -646,20 +646,44 @@ static void ttm_bo_move_pipeline_evict(struct ttm_buffer_object *bo,
{
struct ttm_device *bdev = bo->bdev;
struct ttm_resource_manager *from;
+ struct dma_fence *tmp;
+ int i;
from = ttm_manager_type(bdev, bo->resource->mem_type);
/**
* BO doesn't have a TTM we need to bind/unbind. Just remember
- * this eviction and free up the allocation
+ * this eviction and free up the allocation.
+ * The fence will be saved in the first free slot or in the slot
+ * already used to store a fence from the same context. Since
+ * drivers can't use more than TTM_NUM_MOVE_FENCES contexts for
+ * evictions we should always find a slot to use.
*/
- spin_lock(&from->move_lock);
- if (!from->move || dma_fence_is_later(fence, from->move)) {
- dma_fence_put(from->move);
- from->move = dma_fence_get(fence);
+ spin_lock(&from->eviction_lock);
+ for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
+ tmp = from->eviction_fences[i];
+ if (!tmp)
+ break;
+ if (fence->context != tmp->context)
+ continue;
+ if (dma_fence_is_later(fence, tmp)) {
+ dma_fence_put(tmp);
+ break;
+ }
+ goto unlock;
+ }
+ if (i < TTM_NUM_MOVE_FENCES) {
+ from->eviction_fences[i] = dma_fence_get(fence);
+ } else {
+ WARN(1, "not enough fence slots for all fence contexts");
+ spin_unlock(&from->eviction_lock);
+ dma_fence_wait(fence, false);
+ goto end;
}
- spin_unlock(&from->move_lock);
+unlock:
+ spin_unlock(&from->eviction_lock);
+end:
ttm_resource_free(bo, &bo->resource);
}
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index e2c82ad07eb4..62c34cafa387 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -523,14 +523,15 @@ void ttm_resource_manager_init(struct ttm_resource_manager *man,
{
unsigned i;
- spin_lock_init(&man->move_lock);
man->bdev = bdev;
man->size = size;
man->usage = 0;
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
INIT_LIST_HEAD(&man->lru[i]);
- man->move = NULL;
+ spin_lock_init(&man->eviction_lock);
+ for (i = 0; i < TTM_NUM_MOVE_FENCES; i++)
+ man->eviction_fences[i] = NULL;
}
EXPORT_SYMBOL(ttm_resource_manager_init);
@@ -551,7 +552,7 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
.no_wait_gpu = false,
};
struct dma_fence *fence;
- int ret;
+ int ret, i;
do {
ret = ttm_bo_evict_first(bdev, man, &ctx);
@@ -561,18 +562,24 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
if (ret && ret != -ENOENT)
return ret;
- spin_lock(&man->move_lock);
- fence = dma_fence_get(man->move);
- spin_unlock(&man->move_lock);
+ ret = 0;
- if (fence) {
- ret = dma_fence_wait(fence, false);
- dma_fence_put(fence);
- if (ret)
- return ret;
+ spin_lock(&man->eviction_lock);
+ for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
+ fence = man->eviction_fences[i];
+ if (fence && !dma_fence_is_signaled(fence)) {
+ dma_fence_get(fence);
+ spin_unlock(&man->eviction_lock);
+ ret = dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ if (ret)
+ return ret;
+ spin_lock(&man->eviction_lock);
+ }
}
+ spin_unlock(&man->eviction_lock);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(ttm_resource_manager_evict_all);
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index f49daa504c36..50e6added509 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -50,6 +50,15 @@ struct io_mapping;
struct sg_table;
struct scatterlist;
+/**
+ * define TTM_NUM_MOVE_FENCES - How many entities can be used for evictions
+ *
+ * Pipelined evictions can be spread on multiple entities. This
+ * is the max number of entities that can be used by the driver
+ * for that purpose.
+ */
+#define TTM_NUM_MOVE_FENCES 8
+
/**
* enum ttm_lru_item_type - enumerate ttm_lru_item subclasses
*/
@@ -180,8 +189,8 @@ struct ttm_resource_manager_func {
* @size: Size of the managed region.
* @bdev: ttm device this manager belongs to
* @func: structure pointer implementing the range manager. See above
- * @move_lock: lock for move fence
- * @move: The fence of the last pipelined move operation.
+ * @eviction_lock: lock for eviction fences
+ * @eviction_fences: The fences of the last pipelined move operation.
* @lru: The lru list for this memory type.
*
* This structure is used to identify and manage memory types for a device.
@@ -195,12 +204,12 @@ struct ttm_resource_manager {
struct ttm_device *bdev;
uint64_t size;
const struct ttm_resource_manager_func *func;
- spinlock_t move_lock;
- /*
- * Protected by @move_lock.
+ /* This is very similar to a dma_resv object, but locking rules make
+ * it difficult to use one in this context.
*/
- struct dma_fence *move;
+ spinlock_t eviction_lock;
+ struct dma_fence *eviction_fences[TTM_NUM_MOVE_FENCES];
/*
* Protected by the bdev->lru_lock.
@@ -421,8 +430,12 @@ static inline bool ttm_resource_manager_used(struct ttm_resource_manager *man)
static inline void
ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
{
- dma_fence_put(man->move);
- man->move = NULL;
+ int i;
+
+ for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
+ dma_fence_put(man->eviction_fences[i]);
+ man->eviction_fences[i] = NULL;
+ }
}
void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
--
2.43.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH v2 02/20] drm/ttm: rework pipelined eviction fence handling
2025-11-13 16:05 ` [PATCH v2 02/20] drm/ttm: rework pipelined eviction fence handling Pierre-Eric Pelloux-Prayer
@ 2025-11-14 12:47 ` Christian König
2025-11-18 15:00 ` Thomas Hellström
1 sibling, 0 replies; 20+ messages in thread
From: Christian König @ 2025-11-14 12:47 UTC (permalink / raw)
To: Pierre-Eric Pelloux-Prayer, Alex Deucher, David Airlie,
Simona Vetter, Huang Rui, Matthew Auld, Matthew Brost,
Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, Sumit Semwal
Cc: amd-gfx, dri-devel, linux-kernel, linux-media, linaro-mm-sig,
open list:DMA BUFFER SHARING FRAMEWORK:Keyword:bdma_(?:buf|fence|resv)b
On 11/13/25 17:05, Pierre-Eric Pelloux-Prayer wrote:
> Until now ttm stored a single pipelined eviction fence which means
> drivers had to use a single entity for these evictions.
>
> To lift this requirement, this commit allows up to 8 entities to
> be used.
>
> Ideally a dma_resv object would have been used as a container of
> the eviction fences, but the locking rules makes it complex.
> dma_resv all have the same ww_class, which means "Attempting to
> lock more mutexes after ww_acquire_done." is an error.
>
> One alternative considered was to introduced a 2nd ww_class for
> specific resv to hold a single "transient" lock (= the resv lock
> would only be held for a short period, without taking any other
> locks).
>
> The other option, is to statically reserve a fence array, and
> extend the existing code to deal with N fences, instead of 1.
>
> The driver is still responsible to reserve the correct number
> of fence slots.
>
> ---
> v2:
> - simplified code
> - dropped n_fences
> - name changes
> ---
>
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 ++--
> .../gpu/drm/ttm/tests/ttm_bo_validate_test.c | 11 +++--
> drivers/gpu/drm/ttm/tests/ttm_resource_test.c | 5 +-
> drivers/gpu/drm/ttm/ttm_bo.c | 47 ++++++++++---------
> drivers/gpu/drm/ttm/ttm_bo_util.c | 38 ++++++++++++---
> drivers/gpu/drm/ttm/ttm_resource.c | 31 +++++++-----
> include/drm/ttm/ttm_resource.h | 29 ++++++++----
> 7 files changed, 109 insertions(+), 60 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 326476089db3..3b46a24a8c48 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -2156,7 +2156,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
> {
> struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
> uint64_t size;
> - int r;
> + int r, i;
>
> if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
> adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
> @@ -2190,8 +2190,10 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
> } else {
> drm_sched_entity_destroy(&adev->mman.high_pr);
> drm_sched_entity_destroy(&adev->mman.low_pr);
> - dma_fence_put(man->move);
> - man->move = NULL;
> + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
> + dma_fence_put(man->eviction_fences[i]);
> + man->eviction_fences[i] = NULL;
> + }
That code should have been a TTM function in the first place.
I suggest to just call ttm_resource_manager_cleanup() here instead and add this as comment:
/* Drop all the old fences since re-creating the scheduler entities will allocate next contexts */
Apart from that looks good to me.
Regards,
Christian.
> }
>
> /* this just adjusts TTM size idea, which sets lpfn to the correct value */
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> index 3148f5d3dbd6..8f71906c4238 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> @@ -651,7 +651,7 @@ static void ttm_bo_validate_move_fence_signaled(struct kunit *test)
> int err;
>
> man = ttm_manager_type(priv->ttm_dev, mem_type);
> - man->move = dma_fence_get_stub();
> + man->eviction_fences[0] = dma_fence_get_stub();
>
> bo = ttm_bo_kunit_init(test, test->priv, size, NULL);
> bo->type = bo_type;
> @@ -668,7 +668,7 @@ static void ttm_bo_validate_move_fence_signaled(struct kunit *test)
> KUNIT_EXPECT_EQ(test, ctx.bytes_moved, size);
>
> ttm_bo_put(bo);
> - dma_fence_put(man->move);
> + dma_fence_put(man->eviction_fences[0]);
> }
>
> static const struct ttm_bo_validate_test_case ttm_bo_validate_wait_cases[] = {
> @@ -732,9 +732,9 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test)
>
> spin_lock_init(&fence_lock);
> man = ttm_manager_type(priv->ttm_dev, fst_mem);
> - man->move = alloc_mock_fence(test);
> + man->eviction_fences[0] = alloc_mock_fence(test);
>
> - task = kthread_create(threaded_fence_signal, man->move, "move-fence-signal");
> + task = kthread_create(threaded_fence_signal, man->eviction_fences[0], "move-fence-signal");
> if (IS_ERR(task))
> KUNIT_FAIL(test, "Couldn't create move fence signal task\n");
>
> @@ -742,7 +742,8 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test)
> err = ttm_bo_validate(bo, placement_val, &ctx_val);
> dma_resv_unlock(bo->base.resv);
>
> - dma_fence_wait_timeout(man->move, false, MAX_SCHEDULE_TIMEOUT);
> + dma_fence_wait_timeout(man->eviction_fences[0], false, MAX_SCHEDULE_TIMEOUT);
> + man->eviction_fences[0] = NULL;
>
> KUNIT_EXPECT_EQ(test, err, 0);
> KUNIT_EXPECT_EQ(test, ctx_val.bytes_moved, size);
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
> index e6ea2bd01f07..c0e4e35e0442 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
> @@ -207,6 +207,7 @@ static void ttm_resource_manager_init_basic(struct kunit *test)
> struct ttm_resource_test_priv *priv = test->priv;
> struct ttm_resource_manager *man;
> size_t size = SZ_16K;
> + int i;
>
> man = kunit_kzalloc(test, sizeof(*man), GFP_KERNEL);
> KUNIT_ASSERT_NOT_NULL(test, man);
> @@ -216,8 +217,8 @@ static void ttm_resource_manager_init_basic(struct kunit *test)
> KUNIT_ASSERT_PTR_EQ(test, man->bdev, priv->devs->ttm_dev);
> KUNIT_ASSERT_EQ(test, man->size, size);
> KUNIT_ASSERT_EQ(test, man->usage, 0);
> - KUNIT_ASSERT_NULL(test, man->move);
> - KUNIT_ASSERT_NOT_NULL(test, &man->move_lock);
> + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++)
> + KUNIT_ASSERT_NULL(test, man->eviction_fences[i]);
>
> for (int i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
> KUNIT_ASSERT_TRUE(test, list_empty(&man->lru[i]));
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index f4d9e68b21e7..0b3732ed6f6c 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -658,34 +658,35 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo)
> EXPORT_SYMBOL(ttm_bo_unpin);
>
> /*
> - * Add the last move fence to the BO as kernel dependency and reserve a new
> - * fence slot.
> + * Add the pipelined eviction fencesto the BO as kernel dependency and reserve new
> + * fence slots.
> */
> -static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
> - struct ttm_resource_manager *man,
> - bool no_wait_gpu)
> +static int ttm_bo_add_pipelined_eviction_fences(struct ttm_buffer_object *bo,
> + struct ttm_resource_manager *man,
> + bool no_wait_gpu)
> {
> struct dma_fence *fence;
> - int ret;
> + int i;
>
> - spin_lock(&man->move_lock);
> - fence = dma_fence_get(man->move);
> - spin_unlock(&man->move_lock);
> + spin_lock(&man->eviction_lock);
> + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
> + fence = man->eviction_fences[i];
> + if (!fence)
> + continue;
>
> - if (!fence)
> - return 0;
> -
> - if (no_wait_gpu) {
> - ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY;
> - dma_fence_put(fence);
> - return ret;
> + if (no_wait_gpu) {
> + if (!dma_fence_is_signaled(fence)) {
> + spin_unlock(&man->eviction_lock);
> + return -EBUSY;
> + }
> + } else {
> + dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
> + }
> }
> + spin_unlock(&man->eviction_lock);
>
> - dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
> -
> - ret = dma_resv_reserve_fences(bo->base.resv, 1);
> - dma_fence_put(fence);
> - return ret;
> + /* TODO: this call should be removed. */
> + return dma_resv_reserve_fences(bo->base.resv, 1);
> }
>
> /**
> @@ -718,7 +719,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
> int i, ret;
>
> ticket = dma_resv_locking_ctx(bo->base.resv);
> - ret = dma_resv_reserve_fences(bo->base.resv, 1);
> + ret = dma_resv_reserve_fences(bo->base.resv, TTM_NUM_MOVE_FENCES);
> if (unlikely(ret))
> return ret;
>
> @@ -757,7 +758,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
> return ret;
> }
>
> - ret = ttm_bo_add_move_fence(bo, man, ctx->no_wait_gpu);
> + ret = ttm_bo_add_pipelined_eviction_fences(bo, man, ctx->no_wait_gpu);
> if (unlikely(ret)) {
> ttm_resource_free(bo, res);
> if (ret == -EBUSY)
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index acbbca9d5c92..2ff35d55e462 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -258,7 +258,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
> ret = dma_resv_trylock(&fbo->base.base._resv);
> WARN_ON(!ret);
>
> - ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1);
> + ret = dma_resv_reserve_fences(&fbo->base.base._resv, TTM_NUM_MOVE_FENCES);
> if (ret) {
> dma_resv_unlock(&fbo->base.base._resv);
> kfree(fbo);
> @@ -646,20 +646,44 @@ static void ttm_bo_move_pipeline_evict(struct ttm_buffer_object *bo,
> {
> struct ttm_device *bdev = bo->bdev;
> struct ttm_resource_manager *from;
> + struct dma_fence *tmp;
> + int i;
>
> from = ttm_manager_type(bdev, bo->resource->mem_type);
>
> /**
> * BO doesn't have a TTM we need to bind/unbind. Just remember
> - * this eviction and free up the allocation
> + * this eviction and free up the allocation.
> + * The fence will be saved in the first free slot or in the slot
> + * already used to store a fence from the same context. Since
> + * drivers can't use more than TTM_NUM_MOVE_FENCES contexts for
> + * evictions we should always find a slot to use.
> */
> - spin_lock(&from->move_lock);
> - if (!from->move || dma_fence_is_later(fence, from->move)) {
> - dma_fence_put(from->move);
> - from->move = dma_fence_get(fence);
> + spin_lock(&from->eviction_lock);
> + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
> + tmp = from->eviction_fences[i];
> + if (!tmp)
> + break;
> + if (fence->context != tmp->context)
> + continue;
> + if (dma_fence_is_later(fence, tmp)) {
> + dma_fence_put(tmp);
> + break;
> + }
> + goto unlock;
> + }
> + if (i < TTM_NUM_MOVE_FENCES) {
> + from->eviction_fences[i] = dma_fence_get(fence);
> + } else {
> + WARN(1, "not enough fence slots for all fence contexts");
> + spin_unlock(&from->eviction_lock);
> + dma_fence_wait(fence, false);
> + goto end;
> }
> - spin_unlock(&from->move_lock);
>
> +unlock:
> + spin_unlock(&from->eviction_lock);
> +end:
> ttm_resource_free(bo, &bo->resource);
> }
>
> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> index e2c82ad07eb4..62c34cafa387 100644
> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> @@ -523,14 +523,15 @@ void ttm_resource_manager_init(struct ttm_resource_manager *man,
> {
> unsigned i;
>
> - spin_lock_init(&man->move_lock);
> man->bdev = bdev;
> man->size = size;
> man->usage = 0;
>
> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
> INIT_LIST_HEAD(&man->lru[i]);
> - man->move = NULL;
> + spin_lock_init(&man->eviction_lock);
> + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++)
> + man->eviction_fences[i] = NULL;
> }
> EXPORT_SYMBOL(ttm_resource_manager_init);
>
> @@ -551,7 +552,7 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> .no_wait_gpu = false,
> };
> struct dma_fence *fence;
> - int ret;
> + int ret, i;
>
> do {
> ret = ttm_bo_evict_first(bdev, man, &ctx);
> @@ -561,18 +562,24 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> if (ret && ret != -ENOENT)
> return ret;
>
> - spin_lock(&man->move_lock);
> - fence = dma_fence_get(man->move);
> - spin_unlock(&man->move_lock);
> + ret = 0;
>
> - if (fence) {
> - ret = dma_fence_wait(fence, false);
> - dma_fence_put(fence);
> - if (ret)
> - return ret;
> + spin_lock(&man->eviction_lock);
> + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
> + fence = man->eviction_fences[i];
> + if (fence && !dma_fence_is_signaled(fence)) {
> + dma_fence_get(fence);
> + spin_unlock(&man->eviction_lock);
> + ret = dma_fence_wait(fence, false);
> + dma_fence_put(fence);
> + if (ret)
> + return ret;
> + spin_lock(&man->eviction_lock);
> + }
> }
> + spin_unlock(&man->eviction_lock);
>
> - return 0;
> + return ret;
> }
> EXPORT_SYMBOL(ttm_resource_manager_evict_all);
>
> diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
> index f49daa504c36..50e6added509 100644
> --- a/include/drm/ttm/ttm_resource.h
> +++ b/include/drm/ttm/ttm_resource.h
> @@ -50,6 +50,15 @@ struct io_mapping;
> struct sg_table;
> struct scatterlist;
>
> +/**
> + * define TTM_NUM_MOVE_FENCES - How many entities can be used for evictions
> + *
> + * Pipelined evictions can be spread on multiple entities. This
> + * is the max number of entities that can be used by the driver
> + * for that purpose.
> + */
> +#define TTM_NUM_MOVE_FENCES 8
> +
> /**
> * enum ttm_lru_item_type - enumerate ttm_lru_item subclasses
> */
> @@ -180,8 +189,8 @@ struct ttm_resource_manager_func {
> * @size: Size of the managed region.
> * @bdev: ttm device this manager belongs to
> * @func: structure pointer implementing the range manager. See above
> - * @move_lock: lock for move fence
> - * @move: The fence of the last pipelined move operation.
> + * @eviction_lock: lock for eviction fences
> + * @eviction_fences: The fences of the last pipelined move operation.
> * @lru: The lru list for this memory type.
> *
> * This structure is used to identify and manage memory types for a device.
> @@ -195,12 +204,12 @@ struct ttm_resource_manager {
> struct ttm_device *bdev;
> uint64_t size;
> const struct ttm_resource_manager_func *func;
> - spinlock_t move_lock;
>
> - /*
> - * Protected by @move_lock.
> + /* This is very similar to a dma_resv object, but locking rules make
> + * it difficult to use one in this context.
> */
> - struct dma_fence *move;
> + spinlock_t eviction_lock;
> + struct dma_fence *eviction_fences[TTM_NUM_MOVE_FENCES];
>
> /*
> * Protected by the bdev->lru_lock.
> @@ -421,8 +430,12 @@ static inline bool ttm_resource_manager_used(struct ttm_resource_manager *man)
> static inline void
> ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
> {
> - dma_fence_put(man->move);
> - man->move = NULL;
> + int i;
> +
> + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
> + dma_fence_put(man->eviction_fences[i]);
> + man->eviction_fences[i] = NULL;
> + }
> }
>
> void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH v2 02/20] drm/ttm: rework pipelined eviction fence handling
2025-11-13 16:05 ` [PATCH v2 02/20] drm/ttm: rework pipelined eviction fence handling Pierre-Eric Pelloux-Prayer
2025-11-14 12:47 ` Christian König
@ 2025-11-18 15:00 ` Thomas Hellström
2025-11-19 14:57 ` Christian König
1 sibling, 1 reply; 20+ messages in thread
From: Thomas Hellström @ 2025-11-18 15:00 UTC (permalink / raw)
To: Pierre-Eric Pelloux-Prayer, Alex Deucher, Christian König,
David Airlie, Simona Vetter, Huang Rui, Matthew Auld,
Matthew Brost, Maarten Lankhorst, Maxime Ripard,
Thomas Zimmermann, Sumit Semwal
Cc: amd-gfx, dri-devel, linux-kernel, linux-media, linaro-mm-sig
Hi, Pierre-Eric
On Thu, 2025-11-13 at 17:05 +0100, Pierre-Eric Pelloux-Prayer wrote:
> Until now ttm stored a single pipelined eviction fence which means
> drivers had to use a single entity for these evictions.
>
> To lift this requirement, this commit allows up to 8 entities to
> be used.
>
> Ideally a dma_resv object would have been used as a container of
> the eviction fences, but the locking rules makes it complex.
> dma_resv all have the same ww_class, which means "Attempting to
> lock more mutexes after ww_acquire_done." is an error.
>
> One alternative considered was to introduced a 2nd ww_class for
> specific resv to hold a single "transient" lock (= the resv lock
> would only be held for a short period, without taking any other
> locks).
Wouldn't it be possible to use lockdep_set_class_and_name() to modify
the resv lock class for these particular resv objects after they are
allocated? Reusing the resv code certainly sounds attractive.
Thanks,
Thomas
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH v2 02/20] drm/ttm: rework pipelined eviction fence handling
2025-11-18 15:00 ` Thomas Hellström
@ 2025-11-19 14:57 ` Christian König
0 siblings, 0 replies; 20+ messages in thread
From: Christian König @ 2025-11-19 14:57 UTC (permalink / raw)
To: Thomas Hellström, Pierre-Eric Pelloux-Prayer, Alex Deucher,
David Airlie, Simona Vetter, Huang Rui, Matthew Auld,
Matthew Brost, Maarten Lankhorst, Maxime Ripard,
Thomas Zimmermann, Sumit Semwal
Cc: amd-gfx, dri-devel, linux-kernel, linux-media, linaro-mm-sig
On 11/18/25 16:00, Thomas Hellström wrote:
> Hi, Pierre-Eric
>
> On Thu, 2025-11-13 at 17:05 +0100, Pierre-Eric Pelloux-Prayer wrote:
>> Until now ttm stored a single pipelined eviction fence which means
>> drivers had to use a single entity for these evictions.
>>
>> To lift this requirement, this commit allows up to 8 entities to
>> be used.
>>
>> Ideally a dma_resv object would have been used as a container of
>> the eviction fences, but the locking rules makes it complex.
>> dma_resv all have the same ww_class, which means "Attempting to
>> lock more mutexes after ww_acquire_done." is an error.
>>
>> One alternative considered was to introduced a 2nd ww_class for
>> specific resv to hold a single "transient" lock (= the resv lock
>> would only be held for a short period, without taking any other
>> locks).
>
> Wouldn't it be possible to use lockdep_set_class_and_name() to modify
> the resv lock class for these particular resv objects after they are
> allocated? Reusing the resv code certainly sounds attractive.
Even when we can convince lockdep that this is unproblematic I don't think re-using the dma_resv code here is a good idea.
We should avoid dynamic memory allocation is much as possible and a static array seems to do the job just fine.
Regards,
Christian.
>
> Thanks,
> Thomas
>
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH v2 03/20] drm/amdgpu: remove direct_submit arg from amdgpu_copy_buffer
2025-11-13 16:05 [PATCH v2 00/20] drm/amdgpu: use all SDMA instances for TTM clears and moves Pierre-Eric Pelloux-Prayer
2025-11-13 16:05 ` [PATCH v2 02/20] drm/ttm: rework pipelined eviction fence handling Pierre-Eric Pelloux-Prayer
@ 2025-11-13 16:05 ` Pierre-Eric Pelloux-Prayer
2025-11-14 12:48 ` Christian König
2025-11-13 16:05 ` [PATCH v2 05/20] drm/amdgpu: pass the entity to use to ttm functions Pierre-Eric Pelloux-Prayer
` (4 subsequent siblings)
6 siblings, 1 reply; 20+ messages in thread
From: Pierre-Eric Pelloux-Prayer @ 2025-11-13 16:05 UTC (permalink / raw)
To: Alex Deucher, Christian König, David Airlie, Simona Vetter,
Felix Kuehling, Sumit Semwal
Cc: Pierre-Eric Pelloux-Prayer, amd-gfx, dri-devel, linux-kernel,
linux-media, linaro-mm-sig
It was always false.
Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 20 +++++++------------
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 +-
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 2 +-
4 files changed, 10 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 199693369c7c..02c2479a8840 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -39,7 +39,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
for (i = 0; i < n; i++) {
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
- false, false, 0);
+ false, 0);
if (r)
goto exit_do_move;
r = dma_fence_wait(fence, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 3b46a24a8c48..c985f57fa227 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -354,7 +354,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
}
r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
- &next, false, true, copy_flags);
+ &next, true, copy_flags);
if (r)
goto error;
@@ -2211,16 +2211,13 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
}
static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
- bool direct_submit,
unsigned int num_dw,
struct dma_resv *resv,
bool vm_needs_flush,
struct amdgpu_job **job,
bool delayed, u64 k_job_id)
{
- enum amdgpu_ib_pool_type pool = direct_submit ?
- AMDGPU_IB_POOL_DIRECT :
- AMDGPU_IB_POOL_DELAYED;
+ enum amdgpu_ib_pool_type pool = AMDGPU_IB_POOL_DELAYED;
int r;
struct drm_sched_entity *entity = delayed ? &adev->mman.low_pr :
&adev->mman.high_pr;
@@ -2246,7 +2243,7 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
- struct dma_fence **fence, bool direct_submit,
+ struct dma_fence **fence,
bool vm_needs_flush, uint32_t copy_flags)
{
struct amdgpu_device *adev = ring->adev;
@@ -2256,7 +2253,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
unsigned int i;
int r;
- if (!direct_submit && !ring->sched.ready) {
+ if (!ring->sched.ready) {
dev_err(adev->dev,
"Trying to move memory with ring turned off.\n");
return -EINVAL;
@@ -2265,7 +2262,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
num_loops = DIV_ROUND_UP(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
- r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
+ r = amdgpu_ttm_prepare_job(adev, num_dw,
resv, vm_needs_flush, &job, false,
AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);
if (r)
@@ -2283,10 +2280,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
- if (direct_submit)
- r = amdgpu_job_submit_direct(job, ring, fence);
- else
- *fence = amdgpu_job_submit(job);
+ *fence = amdgpu_job_submit(job);
if (r)
goto error_free;
@@ -2315,7 +2309,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
- r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush,
+ r = amdgpu_ttm_prepare_job(adev, num_dw, resv, vm_needs_flush,
&job, delayed, k_job_id);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 577ee04ce0bf..50e40380fe95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -166,7 +166,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
- struct dma_fence **fence, bool direct_submit,
+ struct dma_fence **fence,
bool vm_needs_flush, uint32_t copy_flags);
int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
struct dma_resv *resv,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 46c84fc60af1..378af0b2aaa9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -153,7 +153,7 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
}
r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
- NULL, &next, false, true, 0);
+ NULL, &next, true, 0);
if (r) {
dev_err(adev->dev, "fail %d to copy memory\n", r);
goto out_unlock;
--
2.43.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH v2 03/20] drm/amdgpu: remove direct_submit arg from amdgpu_copy_buffer
2025-11-13 16:05 ` [PATCH v2 03/20] drm/amdgpu: remove direct_submit arg from amdgpu_copy_buffer Pierre-Eric Pelloux-Prayer
@ 2025-11-14 12:48 ` Christian König
0 siblings, 0 replies; 20+ messages in thread
From: Christian König @ 2025-11-14 12:48 UTC (permalink / raw)
To: Pierre-Eric Pelloux-Prayer, Alex Deucher, David Airlie,
Simona Vetter, Felix Kuehling, Sumit Semwal
Cc: amd-gfx, dri-devel, linux-kernel, linux-media, linaro-mm-sig
On 11/13/25 17:05, Pierre-Eric Pelloux-Prayer wrote:
> It was always false.
>
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
> Reviewed-by: Christian König <christian.koenig@amd.com>
Please push to amd-staging-drm-next.
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 20 +++++++------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 +-
> drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 2 +-
> 4 files changed, 10 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> index 199693369c7c..02c2479a8840 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> @@ -39,7 +39,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
> for (i = 0; i < n; i++) {
> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
> - false, false, 0);
> + false, 0);
> if (r)
> goto exit_do_move;
> r = dma_fence_wait(fence, false);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 3b46a24a8c48..c985f57fa227 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -354,7 +354,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> }
>
> r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
> - &next, false, true, copy_flags);
> + &next, true, copy_flags);
> if (r)
> goto error;
>
> @@ -2211,16 +2211,13 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
> }
>
> static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
> - bool direct_submit,
> unsigned int num_dw,
> struct dma_resv *resv,
> bool vm_needs_flush,
> struct amdgpu_job **job,
> bool delayed, u64 k_job_id)
> {
> - enum amdgpu_ib_pool_type pool = direct_submit ?
> - AMDGPU_IB_POOL_DIRECT :
> - AMDGPU_IB_POOL_DELAYED;
> + enum amdgpu_ib_pool_type pool = AMDGPU_IB_POOL_DELAYED;
> int r;
> struct drm_sched_entity *entity = delayed ? &adev->mman.low_pr :
> &adev->mman.high_pr;
> @@ -2246,7 +2243,7 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
> int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> uint64_t dst_offset, uint32_t byte_count,
> struct dma_resv *resv,
> - struct dma_fence **fence, bool direct_submit,
> + struct dma_fence **fence,
> bool vm_needs_flush, uint32_t copy_flags)
> {
> struct amdgpu_device *adev = ring->adev;
> @@ -2256,7 +2253,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> unsigned int i;
> int r;
>
> - if (!direct_submit && !ring->sched.ready) {
> + if (!ring->sched.ready) {
> dev_err(adev->dev,
> "Trying to move memory with ring turned off.\n");
> return -EINVAL;
> @@ -2265,7 +2262,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
> num_loops = DIV_ROUND_UP(byte_count, max_bytes);
> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
> - r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
> + r = amdgpu_ttm_prepare_job(adev, num_dw,
> resv, vm_needs_flush, &job, false,
> AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);
> if (r)
> @@ -2283,10 +2280,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>
> amdgpu_ring_pad_ib(ring, &job->ibs[0]);
> WARN_ON(job->ibs[0].length_dw > num_dw);
> - if (direct_submit)
> - r = amdgpu_job_submit_direct(job, ring, fence);
> - else
> - *fence = amdgpu_job_submit(job);
> + *fence = amdgpu_job_submit(job);
> if (r)
> goto error_free;
>
> @@ -2315,7 +2309,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
> max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
> num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
> - r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush,
> + r = amdgpu_ttm_prepare_job(adev, num_dw, resv, vm_needs_flush,
> &job, delayed, k_job_id);
> if (r)
> return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 577ee04ce0bf..50e40380fe95 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -166,7 +166,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
> int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> uint64_t dst_offset, uint32_t byte_count,
> struct dma_resv *resv,
> - struct dma_fence **fence, bool direct_submit,
> + struct dma_fence **fence,
> bool vm_needs_flush, uint32_t copy_flags);
> int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> struct dma_resv *resv,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index 46c84fc60af1..378af0b2aaa9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -153,7 +153,7 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
> }
>
> r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
> - NULL, &next, false, true, 0);
> + NULL, &next, true, 0);
> if (r) {
> dev_err(adev->dev, "fail %d to copy memory\n", r);
> goto out_unlock;
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH v2 05/20] drm/amdgpu: pass the entity to use to ttm functions
2025-11-13 16:05 [PATCH v2 00/20] drm/amdgpu: use all SDMA instances for TTM clears and moves Pierre-Eric Pelloux-Prayer
2025-11-13 16:05 ` [PATCH v2 02/20] drm/ttm: rework pipelined eviction fence handling Pierre-Eric Pelloux-Prayer
2025-11-13 16:05 ` [PATCH v2 03/20] drm/amdgpu: remove direct_submit arg from amdgpu_copy_buffer Pierre-Eric Pelloux-Prayer
@ 2025-11-13 16:05 ` Pierre-Eric Pelloux-Prayer
2025-11-14 13:07 ` Christian König
2025-11-14 20:20 ` Felix Kuehling
2025-11-13 16:05 ` [PATCH v2 09/20] drm/amdgpu: pass optional dependency to amdgpu_fill_buffer Pierre-Eric Pelloux-Prayer
` (3 subsequent siblings)
6 siblings, 2 replies; 20+ messages in thread
From: Pierre-Eric Pelloux-Prayer @ 2025-11-13 16:05 UTC (permalink / raw)
To: Alex Deucher, Christian König, David Airlie, Simona Vetter,
Felix Kuehling, Sumit Semwal
Cc: Pierre-Eric Pelloux-Prayer, amd-gfx, dri-devel, linux-kernel,
linux-media, linaro-mm-sig
This way the caller can select the one it wants to use.
Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 3 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 75 +++++++++++--------
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 16 ++--
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 +-
5 files changed, 60 insertions(+), 41 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 02c2479a8840..b59040a8771f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -38,7 +38,8 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
stime = ktime_get();
for (i = 0; i < n; i++) {
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
+ r = amdgpu_copy_buffer(ring, &adev->mman.default_entity.base,
+ saddr, daddr, size, NULL, &fence,
false, 0);
if (r)
goto exit_do_move;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index e08f58de4b17..c06c132a753c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1321,8 +1321,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
if (r)
goto out;
- r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true,
- AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
+ r = amdgpu_fill_buffer(&adev->mman.clear_entity, abo, 0, &bo->base._resv,
+ &fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
if (WARN_ON(r))
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 42d448cd6a6d..c8d59ca2b3bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -164,6 +164,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
/**
* amdgpu_ttm_map_buffer - Map memory into the GART windows
+ * @entity: entity to run the window setup job
* @bo: buffer object to map
* @mem: memory object to map
* @mm_cur: range to map
@@ -176,7 +177,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
* Setup one of the GART windows to access a specific piece of memory or return
* the physical address for local memory.
*/
-static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
+static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
+ struct ttm_buffer_object *bo,
struct ttm_resource *mem,
struct amdgpu_res_cursor *mm_cur,
unsigned int window, struct amdgpu_ring *ring,
@@ -224,7 +226,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
- r = amdgpu_job_alloc_with_ib(adev, &adev->mman.default_entity.base,
+ r = amdgpu_job_alloc_with_ib(adev, entity,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4 + num_bytes,
AMDGPU_IB_POOL_DELAYED, &job,
@@ -274,6 +276,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
/**
* amdgpu_ttm_copy_mem_to_mem - Helper function for copy
* @adev: amdgpu device
+ * @entity: entity to run the jobs
* @src: buffer/address where to read from
* @dst: buffer/address where to write to
* @size: number of bytes to copy
@@ -288,6 +291,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
*/
__attribute__((nonnull))
static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
+ struct drm_sched_entity *entity,
const struct amdgpu_copy_mem *src,
const struct amdgpu_copy_mem *dst,
uint64_t size, bool tmz,
@@ -320,12 +324,14 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
/* Map src to window 0 and dst to window 1. */
- r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
+ r = amdgpu_ttm_map_buffer(entity,
+ src->bo, src->mem, &src_mm,
0, ring, tmz, &cur_size, &from);
if (r)
goto error;
- r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
+ r = amdgpu_ttm_map_buffer(entity,
+ dst->bo, dst->mem, &dst_mm,
1, ring, tmz, &cur_size, &to);
if (r)
goto error;
@@ -353,7 +359,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
write_compress_disable));
}
- r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
+ r = amdgpu_copy_buffer(ring, entity, from, to, cur_size, resv,
&next, true, copy_flags);
if (r)
goto error;
@@ -394,7 +400,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
src.offset = 0;
dst.offset = 0;
- r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
+ r = amdgpu_ttm_copy_mem_to_mem(adev,
+ &adev->mman.move_entity.base,
+ &src, &dst,
new_mem->size,
amdgpu_bo_encrypted(abo),
bo->base.resv, &fence);
@@ -406,8 +414,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
struct dma_fence *wipe_fence = NULL;
- r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
- false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
+ r = amdgpu_fill_buffer(&adev->mman.move_entity,
+ abo, 0, NULL, &wipe_fence,
+ AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
if (r) {
goto error;
} else if (wipe_fence) {
@@ -2223,16 +2232,15 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
}
static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
+ struct drm_sched_entity *entity,
unsigned int num_dw,
struct dma_resv *resv,
bool vm_needs_flush,
struct amdgpu_job **job,
- bool delayed, u64 k_job_id)
+ u64 k_job_id)
{
enum amdgpu_ib_pool_type pool = AMDGPU_IB_POOL_DELAYED;
int r;
- struct drm_sched_entity *entity = delayed ? &adev->mman.clear_entity.base :
- &adev->mman.move_entity.base;
r = amdgpu_job_alloc_with_ib(adev, entity,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4, pool, job, k_job_id);
@@ -2252,7 +2260,9 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
DMA_RESV_USAGE_BOOKKEEP);
}
-int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
+int amdgpu_copy_buffer(struct amdgpu_ring *ring,
+ struct drm_sched_entity *entity,
+ uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence,
@@ -2274,8 +2284,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
num_loops = DIV_ROUND_UP(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
- r = amdgpu_ttm_prepare_job(adev, num_dw,
- resv, vm_needs_flush, &job, false,
+ r = amdgpu_ttm_prepare_job(adev, entity, num_dw,
+ resv, vm_needs_flush, &job,
AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);
if (r)
return r;
@@ -2304,11 +2314,13 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
return r;
}
-static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
+static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring,
+ struct drm_sched_entity *entity,
+ uint32_t src_data,
uint64_t dst_addr, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence,
- bool vm_needs_flush, bool delayed,
+ bool vm_needs_flush,
u64 k_job_id)
{
struct amdgpu_device *adev = ring->adev;
@@ -2321,8 +2333,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
- r = amdgpu_ttm_prepare_job(adev, num_dw, resv, vm_needs_flush,
- &job, delayed, k_job_id);
+ r = amdgpu_ttm_prepare_job(adev, entity, num_dw, resv,
+ vm_needs_flush, &job, k_job_id);
if (r)
return r;
@@ -2386,13 +2398,14 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
/* Never clear more than 256MiB at once to avoid timeouts */
size = min(cursor.size, 256ULL << 20);
- r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
+ r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity.base,
+ &bo->tbo, bo->tbo.resource, &cursor,
1, ring, false, &size, &addr);
if (r)
goto err;
- r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
- &next, true, true,
+ r = amdgpu_ttm_fill_mem(ring, &adev->mman.clear_entity.base, 0, addr, size, resv,
+ &next, true,
AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
if (r)
goto err;
@@ -2408,12 +2421,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
return r;
}
-int amdgpu_fill_buffer(struct amdgpu_bo *bo,
- uint32_t src_data,
- struct dma_resv *resv,
- struct dma_fence **f,
- bool delayed,
- u64 k_job_id)
+int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
+ struct amdgpu_bo *bo,
+ uint32_t src_data,
+ struct dma_resv *resv,
+ struct dma_fence **f,
+ u64 k_job_id)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
@@ -2437,13 +2450,15 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
/* Never fill more than 256MiB at once to avoid timeouts */
cur_size = min(dst.size, 256ULL << 20);
- r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
+ r = amdgpu_ttm_map_buffer(&entity->base,
+ &bo->tbo, bo->tbo.resource, &dst,
1, ring, false, &cur_size, &to);
if (r)
goto error;
- r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
- &next, true, delayed, k_job_id);
+ r = amdgpu_ttm_fill_mem(ring, &entity->base,
+ src_data, to, cur_size, resv,
+ &next, true, k_job_id);
if (r)
goto error;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index d2295d6c2b67..e1655f86a016 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -167,7 +167,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev);
void amdgpu_ttm_fini(struct amdgpu_device *adev);
void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
bool enable);
-int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
+int amdgpu_copy_buffer(struct amdgpu_ring *ring,
+ struct drm_sched_entity *entity,
+ uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence,
@@ -175,12 +177,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
struct dma_resv *resv,
struct dma_fence **fence);
-int amdgpu_fill_buffer(struct amdgpu_bo *bo,
- uint32_t src_data,
- struct dma_resv *resv,
- struct dma_fence **fence,
- bool delayed,
- u64 k_job_id);
+int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
+ struct amdgpu_bo *bo,
+ uint32_t src_data,
+ struct dma_resv *resv,
+ struct dma_fence **f,
+ u64 k_job_id);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index d74ff6e90590..09756132fa1b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -157,7 +157,8 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
goto out_unlock;
}
- r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
+ r = amdgpu_copy_buffer(ring, &entity->base,
+ gart_s, gart_d, size * PAGE_SIZE,
NULL, &next, true, 0);
if (r) {
dev_err(adev->dev, "fail %d to copy memory\n", r);
--
2.43.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH v2 05/20] drm/amdgpu: pass the entity to use to ttm functions
2025-11-13 16:05 ` [PATCH v2 05/20] drm/amdgpu: pass the entity to use to ttm functions Pierre-Eric Pelloux-Prayer
@ 2025-11-14 13:07 ` Christian König
2025-11-14 14:41 ` Pierre-Eric Pelloux-Prayer
2025-11-14 20:20 ` Felix Kuehling
1 sibling, 1 reply; 20+ messages in thread
From: Christian König @ 2025-11-14 13:07 UTC (permalink / raw)
To: Pierre-Eric Pelloux-Prayer, Alex Deucher, David Airlie,
Simona Vetter, Felix Kuehling, Sumit Semwal
Cc: amd-gfx, dri-devel, linux-kernel, linux-media, linaro-mm-sig
On 11/13/25 17:05, Pierre-Eric Pelloux-Prayer wrote:
> This way the caller can select the one it wants to use.
>
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 3 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 75 +++++++++++--------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 16 ++--
> drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 +-
> 5 files changed, 60 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> index 02c2479a8840..b59040a8771f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> @@ -38,7 +38,8 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
> stime = ktime_get();
> for (i = 0; i < n; i++) {
> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> - r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
> + r = amdgpu_copy_buffer(ring, &adev->mman.default_entity.base,
> + saddr, daddr, size, NULL, &fence,
> false, 0);
> if (r)
> goto exit_do_move;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index e08f58de4b17..c06c132a753c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1321,8 +1321,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
> if (r)
> goto out;
>
> - r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true,
> - AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
> + r = amdgpu_fill_buffer(&adev->mman.clear_entity, abo, 0, &bo->base._resv,
> + &fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
> if (WARN_ON(r))
> goto out;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 42d448cd6a6d..c8d59ca2b3bd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -164,6 +164,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
>
> /**
> * amdgpu_ttm_map_buffer - Map memory into the GART windows
> + * @entity: entity to run the window setup job
> * @bo: buffer object to map
> * @mem: memory object to map
> * @mm_cur: range to map
> @@ -176,7 +177,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
> * Setup one of the GART windows to access a specific piece of memory or return
> * the physical address for local memory.
> */
> -static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
> +static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
> + struct ttm_buffer_object *bo,
Probably better to split this patch into multiple patches.
One which changes amdgpu_ttm_map_buffer() and then another one or two for the higher level copy_buffer and fill_buffer functions.
> struct ttm_resource *mem,
> struct amdgpu_res_cursor *mm_cur,
> unsigned int window, struct amdgpu_ring *ring,
> @@ -224,7 +226,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
> num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
> num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>
> - r = amdgpu_job_alloc_with_ib(adev, &adev->mman.default_entity.base,
> + r = amdgpu_job_alloc_with_ib(adev, entity,
> AMDGPU_FENCE_OWNER_UNDEFINED,
> num_dw * 4 + num_bytes,
> AMDGPU_IB_POOL_DELAYED, &job,
> @@ -274,6 +276,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
> /**
> * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
> * @adev: amdgpu device
> + * @entity: entity to run the jobs
> * @src: buffer/address where to read from
> * @dst: buffer/address where to write to
> * @size: number of bytes to copy
> @@ -288,6 +291,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
> */
> __attribute__((nonnull))
> static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> + struct drm_sched_entity *entity,
> const struct amdgpu_copy_mem *src,
> const struct amdgpu_copy_mem *dst,
> uint64_t size, bool tmz,
> @@ -320,12 +324,14 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
>
> /* Map src to window 0 and dst to window 1. */
> - r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
> + r = amdgpu_ttm_map_buffer(entity,
> + src->bo, src->mem, &src_mm,
> 0, ring, tmz, &cur_size, &from);
> if (r)
> goto error;
>
> - r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
> + r = amdgpu_ttm_map_buffer(entity,
> + dst->bo, dst->mem, &dst_mm,
> 1, ring, tmz, &cur_size, &to);
> if (r)
> goto error;
> @@ -353,7 +359,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> write_compress_disable));
> }
>
> - r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
> + r = amdgpu_copy_buffer(ring, entity, from, to, cur_size, resv,
> &next, true, copy_flags);
> if (r)
> goto error;
> @@ -394,7 +400,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> src.offset = 0;
> dst.offset = 0;
>
> - r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
> + r = amdgpu_ttm_copy_mem_to_mem(adev,
> + &adev->mman.move_entity.base,
> + &src, &dst,
> new_mem->size,
> amdgpu_bo_encrypted(abo),
> bo->base.resv, &fence);
> @@ -406,8 +414,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
> struct dma_fence *wipe_fence = NULL;
>
> - r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
> - false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
> + r = amdgpu_fill_buffer(&adev->mman.move_entity,
> + abo, 0, NULL, &wipe_fence,
> + AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
> if (r) {
> goto error;
> } else if (wipe_fence) {
> @@ -2223,16 +2232,15 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
> }
>
> static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
> + struct drm_sched_entity *entity,
> unsigned int num_dw,
> struct dma_resv *resv,
> bool vm_needs_flush,
> struct amdgpu_job **job,
> - bool delayed, u64 k_job_id)
> + u64 k_job_id)
> {
> enum amdgpu_ib_pool_type pool = AMDGPU_IB_POOL_DELAYED;
> int r;
> - struct drm_sched_entity *entity = delayed ? &adev->mman.clear_entity.base :
> - &adev->mman.move_entity.base;
> r = amdgpu_job_alloc_with_ib(adev, entity,
> AMDGPU_FENCE_OWNER_UNDEFINED,
> num_dw * 4, pool, job, k_job_id);
> @@ -2252,7 +2260,9 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
> DMA_RESV_USAGE_BOOKKEEP);
> }
>
> -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
> + struct drm_sched_entity *entity,
> + uint64_t src_offset,
> uint64_t dst_offset, uint32_t byte_count,
> struct dma_resv *resv,
> struct dma_fence **fence,
> @@ -2274,8 +2284,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
> num_loops = DIV_ROUND_UP(byte_count, max_bytes);
> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
> - r = amdgpu_ttm_prepare_job(adev, num_dw,
> - resv, vm_needs_flush, &job, false,
> + r = amdgpu_ttm_prepare_job(adev, entity, num_dw,
> + resv, vm_needs_flush, &job,
> AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);
> if (r)
> return r;
> @@ -2304,11 +2314,13 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> return r;
> }
>
> -static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
> +static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring,
> + struct drm_sched_entity *entity,
> + uint32_t src_data,
> uint64_t dst_addr, uint32_t byte_count,
> struct dma_resv *resv,
> struct dma_fence **fence,
> - bool vm_needs_flush, bool delayed,
> + bool vm_needs_flush,
> u64 k_job_id)
> {
> struct amdgpu_device *adev = ring->adev;
> @@ -2321,8 +2333,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
> max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
> num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
> - r = amdgpu_ttm_prepare_job(adev, num_dw, resv, vm_needs_flush,
> - &job, delayed, k_job_id);
> + r = amdgpu_ttm_prepare_job(adev, entity, num_dw, resv,
> + vm_needs_flush, &job, k_job_id);
> if (r)
> return r;
>
> @@ -2386,13 +2398,14 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> /* Never clear more than 256MiB at once to avoid timeouts */
> size = min(cursor.size, 256ULL << 20);
>
> - r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
> + r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity.base,
> + &bo->tbo, bo->tbo.resource, &cursor,
> 1, ring, false, &size, &addr);
> if (r)
> goto err;
>
> - r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
> - &next, true, true,
> + r = amdgpu_ttm_fill_mem(ring, &adev->mman.clear_entity.base, 0, addr, size, resv,
> + &next, true,
> AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
> if (r)
> goto err;
> @@ -2408,12 +2421,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> return r;
> }
>
> -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> - uint32_t src_data,
> - struct dma_resv *resv,
> - struct dma_fence **f,
> - bool delayed,
> - u64 k_job_id)
> +int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
> + struct amdgpu_bo *bo,
> + uint32_t src_data,
> + struct dma_resv *resv,
> + struct dma_fence **f,
> + u64 k_job_id)
> {
> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> @@ -2437,13 +2450,15 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> /* Never fill more than 256MiB at once to avoid timeouts */
> cur_size = min(dst.size, 256ULL << 20);
>
> - r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
> + r = amdgpu_ttm_map_buffer(&entity->base,
> + &bo->tbo, bo->tbo.resource, &dst,
> 1, ring, false, &cur_size, &to);
> if (r)
> goto error;
>
> - r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
> - &next, true, delayed, k_job_id);
> + r = amdgpu_ttm_fill_mem(ring, &entity->base,
> + src_data, to, cur_size, resv,
> + &next, true, k_job_id);
> if (r)
> goto error;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index d2295d6c2b67..e1655f86a016 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -167,7 +167,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev);
> void amdgpu_ttm_fini(struct amdgpu_device *adev);
> void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
> bool enable);
> -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
> + struct drm_sched_entity *entity,
If I'm not completely mistaken you should be able to drop the ring argument since that can be determined from the entity.
Apart from that looks rather good to me.
Regards,
Christian.
> + uint64_t src_offset,
> uint64_t dst_offset, uint32_t byte_count,
> struct dma_resv *resv,
> struct dma_fence **fence,
> @@ -175,12 +177,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> struct dma_resv *resv,
> struct dma_fence **fence);
> -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> - uint32_t src_data,
> - struct dma_resv *resv,
> - struct dma_fence **fence,
> - bool delayed,
> - u64 k_job_id);
> +int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
> + struct amdgpu_bo *bo,
> + uint32_t src_data,
> + struct dma_resv *resv,
> + struct dma_fence **f,
> + u64 k_job_id);
>
> int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
> void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index d74ff6e90590..09756132fa1b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -157,7 +157,8 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
> goto out_unlock;
> }
>
> - r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
> + r = amdgpu_copy_buffer(ring, &entity->base,
> + gart_s, gart_d, size * PAGE_SIZE,
> NULL, &next, true, 0);
> if (r) {
> dev_err(adev->dev, "fail %d to copy memory\n", r);
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH v2 05/20] drm/amdgpu: pass the entity to use to ttm functions
2025-11-14 13:07 ` Christian König
@ 2025-11-14 14:41 ` Pierre-Eric Pelloux-Prayer
2025-11-17 9:41 ` Pierre-Eric Pelloux-Prayer
0 siblings, 1 reply; 20+ messages in thread
From: Pierre-Eric Pelloux-Prayer @ 2025-11-14 14:41 UTC (permalink / raw)
To: Christian König, Pierre-Eric Pelloux-Prayer, Alex Deucher,
David Airlie, Simona Vetter, Felix Kuehling, Sumit Semwal
Cc: amd-gfx, dri-devel, linux-kernel, linux-media, linaro-mm-sig
Le 14/11/2025 à 14:07, Christian König a écrit :
> On 11/13/25 17:05, Pierre-Eric Pelloux-Prayer wrote:
>> This way the caller can select the one it wants to use.
>>
>> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 3 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 75 +++++++++++--------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 16 ++--
>> drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 +-
>> 5 files changed, 60 insertions(+), 41 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
>> index 02c2479a8840..b59040a8771f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
>> @@ -38,7 +38,8 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
>> stime = ktime_get();
>> for (i = 0; i < n; i++) {
>> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>> - r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
>> + r = amdgpu_copy_buffer(ring, &adev->mman.default_entity.base,
>> + saddr, daddr, size, NULL, &fence,
>> false, 0);
>> if (r)
>> goto exit_do_move;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index e08f58de4b17..c06c132a753c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -1321,8 +1321,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
>> if (r)
>> goto out;
>>
>> - r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true,
>> - AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
>> + r = amdgpu_fill_buffer(&adev->mman.clear_entity, abo, 0, &bo->base._resv,
>> + &fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
>> if (WARN_ON(r))
>> goto out;
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 42d448cd6a6d..c8d59ca2b3bd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -164,6 +164,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
>>
>> /**
>> * amdgpu_ttm_map_buffer - Map memory into the GART windows
>> + * @entity: entity to run the window setup job
>> * @bo: buffer object to map
>> * @mem: memory object to map
>> * @mm_cur: range to map
>> @@ -176,7 +177,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
>> * Setup one of the GART windows to access a specific piece of memory or return
>> * the physical address for local memory.
>> */
>> -static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
>> +static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
>> + struct ttm_buffer_object *bo,
>
>
> Probably better to split this patch into multiple patches.
>
> One which changes amdgpu_ttm_map_buffer() and then another one or two for the higher level copy_buffer and fill_buffer functions.
OK.
>
>> struct ttm_resource *mem,
>> struct amdgpu_res_cursor *mm_cur,
>> unsigned int window, struct amdgpu_ring *ring,
>> @@ -224,7 +226,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
>> num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
>> num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>>
>> - r = amdgpu_job_alloc_with_ib(adev, &adev->mman.default_entity.base,
>> + r = amdgpu_job_alloc_with_ib(adev, entity,
>> AMDGPU_FENCE_OWNER_UNDEFINED,
>> num_dw * 4 + num_bytes,
>> AMDGPU_IB_POOL_DELAYED, &job,
>> @@ -274,6 +276,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
>> /**
>> * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
>> * @adev: amdgpu device
>> + * @entity: entity to run the jobs
>> * @src: buffer/address where to read from
>> * @dst: buffer/address where to write to
>> * @size: number of bytes to copy
>> @@ -288,6 +291,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
>> */
>> __attribute__((nonnull))
>> static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>> + struct drm_sched_entity *entity,
>> const struct amdgpu_copy_mem *src,
>> const struct amdgpu_copy_mem *dst,
>> uint64_t size, bool tmz,
>> @@ -320,12 +324,14 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>> cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
>>
>> /* Map src to window 0 and dst to window 1. */
>> - r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
>> + r = amdgpu_ttm_map_buffer(entity,
>> + src->bo, src->mem, &src_mm,
>> 0, ring, tmz, &cur_size, &from);
>> if (r)
>> goto error;
>>
>> - r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
>> + r = amdgpu_ttm_map_buffer(entity,
>> + dst->bo, dst->mem, &dst_mm,
>> 1, ring, tmz, &cur_size, &to);
>> if (r)
>> goto error;
>> @@ -353,7 +359,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>> write_compress_disable));
>> }
>>
>> - r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
>> + r = amdgpu_copy_buffer(ring, entity, from, to, cur_size, resv,
>> &next, true, copy_flags);
>> if (r)
>> goto error;
>> @@ -394,7 +400,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>> src.offset = 0;
>> dst.offset = 0;
>>
>> - r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
>> + r = amdgpu_ttm_copy_mem_to_mem(adev,
>> + &adev->mman.move_entity.base,
>> + &src, &dst,
>> new_mem->size,
>> amdgpu_bo_encrypted(abo),
>> bo->base.resv, &fence);
>> @@ -406,8 +414,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>> (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
>> struct dma_fence *wipe_fence = NULL;
>>
>> - r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
>> - false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
>> + r = amdgpu_fill_buffer(&adev->mman.move_entity,
>> + abo, 0, NULL, &wipe_fence,
>> + AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
>> if (r) {
>> goto error;
>> } else if (wipe_fence) {
>> @@ -2223,16 +2232,15 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
>> }
>>
>> static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
>> + struct drm_sched_entity *entity,
>> unsigned int num_dw,
>> struct dma_resv *resv,
>> bool vm_needs_flush,
>> struct amdgpu_job **job,
>> - bool delayed, u64 k_job_id)
>> + u64 k_job_id)
>> {
>> enum amdgpu_ib_pool_type pool = AMDGPU_IB_POOL_DELAYED;
>> int r;
>> - struct drm_sched_entity *entity = delayed ? &adev->mman.clear_entity.base :
>> - &adev->mman.move_entity.base;
>> r = amdgpu_job_alloc_with_ib(adev, entity,
>> AMDGPU_FENCE_OWNER_UNDEFINED,
>> num_dw * 4, pool, job, k_job_id);
>> @@ -2252,7 +2260,9 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
>> DMA_RESV_USAGE_BOOKKEEP);
>> }
>>
>> -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>> +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
>> + struct drm_sched_entity *entity,
>> + uint64_t src_offset,
>> uint64_t dst_offset, uint32_t byte_count,
>> struct dma_resv *resv,
>> struct dma_fence **fence,
>> @@ -2274,8 +2284,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>> max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
>> num_loops = DIV_ROUND_UP(byte_count, max_bytes);
>> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
>> - r = amdgpu_ttm_prepare_job(adev, num_dw,
>> - resv, vm_needs_flush, &job, false,
>> + r = amdgpu_ttm_prepare_job(adev, entity, num_dw,
>> + resv, vm_needs_flush, &job,
>> AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);
>> if (r)
>> return r;
>> @@ -2304,11 +2314,13 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>> return r;
>> }
>>
>> -static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
>> +static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring,
>> + struct drm_sched_entity *entity,
>> + uint32_t src_data,
>> uint64_t dst_addr, uint32_t byte_count,
>> struct dma_resv *resv,
>> struct dma_fence **fence,
>> - bool vm_needs_flush, bool delayed,
>> + bool vm_needs_flush,
>> u64 k_job_id)
>> {
>> struct amdgpu_device *adev = ring->adev;
>> @@ -2321,8 +2333,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
>> max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
>> num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
>> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
>> - r = amdgpu_ttm_prepare_job(adev, num_dw, resv, vm_needs_flush,
>> - &job, delayed, k_job_id);
>> + r = amdgpu_ttm_prepare_job(adev, entity, num_dw, resv,
>> + vm_needs_flush, &job, k_job_id);
>> if (r)
>> return r;
>>
>> @@ -2386,13 +2398,14 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>> /* Never clear more than 256MiB at once to avoid timeouts */
>> size = min(cursor.size, 256ULL << 20);
>>
>> - r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
>> + r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity.base,
>> + &bo->tbo, bo->tbo.resource, &cursor,
>> 1, ring, false, &size, &addr);
>> if (r)
>> goto err;
>>
>> - r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
>> - &next, true, true,
>> + r = amdgpu_ttm_fill_mem(ring, &adev->mman.clear_entity.base, 0, addr, size, resv,
>> + &next, true,
>> AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
>> if (r)
>> goto err;
>> @@ -2408,12 +2421,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>> return r;
>> }
>>
>> -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>> - uint32_t src_data,
>> - struct dma_resv *resv,
>> - struct dma_fence **f,
>> - bool delayed,
>> - u64 k_job_id)
>> +int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
>> + struct amdgpu_bo *bo,
>> + uint32_t src_data,
>> + struct dma_resv *resv,
>> + struct dma_fence **f,
>> + u64 k_job_id)
>> {
>> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>> @@ -2437,13 +2450,15 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>> /* Never fill more than 256MiB at once to avoid timeouts */
>> cur_size = min(dst.size, 256ULL << 20);
>>
>> - r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
>> + r = amdgpu_ttm_map_buffer(&entity->base,
>> + &bo->tbo, bo->tbo.resource, &dst,
>> 1, ring, false, &cur_size, &to);
>> if (r)
>> goto error;
>>
>> - r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
>> - &next, true, delayed, k_job_id);
>> + r = amdgpu_ttm_fill_mem(ring, &entity->base,
>> + src_data, to, cur_size, resv,
>> + &next, true, k_job_id);
>> if (r)
>> goto error;
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index d2295d6c2b67..e1655f86a016 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -167,7 +167,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev);
>> void amdgpu_ttm_fini(struct amdgpu_device *adev);
>> void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
>> bool enable);
>> -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>> +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
>> + struct drm_sched_entity *entity,
>
> If I'm not completely mistaken you should be able to drop the ring argument since that can be determined from the entity.
OK will do.
Pierre-Eric
>
> Apart from that looks rather good to me.
>
> Regards,
> Christian.
>
>> + uint64_t src_offset,
>> uint64_t dst_offset, uint32_t byte_count,
>> struct dma_resv *resv,
>> struct dma_fence **fence,
>> @@ -175,12 +177,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>> int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>> struct dma_resv *resv,
>> struct dma_fence **fence);
>> -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>> - uint32_t src_data,
>> - struct dma_resv *resv,
>> - struct dma_fence **fence,
>> - bool delayed,
>> - u64 k_job_id);
>> +int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
>> + struct amdgpu_bo *bo,
>> + uint32_t src_data,
>> + struct dma_resv *resv,
>> + struct dma_fence **f,
>> + u64 k_job_id);
>>
>> int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
>> void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
>> index d74ff6e90590..09756132fa1b 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
>> @@ -157,7 +157,8 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
>> goto out_unlock;
>> }
>>
>> - r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
>> + r = amdgpu_copy_buffer(ring, &entity->base,
>> + gart_s, gart_d, size * PAGE_SIZE,
>> NULL, &next, true, 0);
>> if (r) {
>> dev_err(adev->dev, "fail %d to copy memory\n", r);
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH v2 05/20] drm/amdgpu: pass the entity to use to ttm functions
2025-11-14 14:41 ` Pierre-Eric Pelloux-Prayer
@ 2025-11-17 9:41 ` Pierre-Eric Pelloux-Prayer
0 siblings, 0 replies; 20+ messages in thread
From: Pierre-Eric Pelloux-Prayer @ 2025-11-17 9:41 UTC (permalink / raw)
To: Christian König, Pierre-Eric Pelloux-Prayer, Alex Deucher,
David Airlie, Simona Vetter, Felix Kuehling, Sumit Semwal
Cc: amd-gfx, dri-devel, linux-kernel, linux-media, linaro-mm-sig
Le 14/11/2025 à 15:41, Pierre-Eric Pelloux-Prayer a écrit :
>
>
> Le 14/11/2025 à 14:07, Christian König a écrit :
>> On 11/13/25 17:05, Pierre-Eric Pelloux-Prayer wrote:
>>> This way the caller can select the one it wants to use.
>>>
>>> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 3 +-
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +-
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 75 +++++++++++--------
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 16 ++--
>>> drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 +-
>>> 5 files changed, 60 insertions(+), 41 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/
>>> amd/amdgpu/amdgpu_benchmark.c
>>> index 02c2479a8840..b59040a8771f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
>>> @@ -38,7 +38,8 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device
>>> *adev, unsigned size,
>>> stime = ktime_get();
>>> for (i = 0; i < n; i++) {
>>> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>>> - r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
>>> + r = amdgpu_copy_buffer(ring, &adev->mman.default_entity.base,
>>> + saddr, daddr, size, NULL, &fence,
>>> false, 0);
>>> if (r)
>>> goto exit_do_move;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/
>>> amd/amdgpu/amdgpu_object.c
>>> index e08f58de4b17..c06c132a753c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> @@ -1321,8 +1321,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object
>>> *bo)
>>> if (r)
>>> goto out;
>>> - r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true,
>>> - AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
>>> + r = amdgpu_fill_buffer(&adev->mman.clear_entity, abo, 0, &bo->base._resv,
>>> + &fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
>>> if (WARN_ON(r))
>>> goto out;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/
>>> amdgpu/amdgpu_ttm.c
>>> index 42d448cd6a6d..c8d59ca2b3bd 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -164,6 +164,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
>>> /**
>>> * amdgpu_ttm_map_buffer - Map memory into the GART windows
>>> + * @entity: entity to run the window setup job
>>> * @bo: buffer object to map
>>> * @mem: memory object to map
>>> * @mm_cur: range to map
>>> @@ -176,7 +177,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
>>> * Setup one of the GART windows to access a specific piece of memory or
>>> return
>>> * the physical address for local memory.
>>> */
>>> -static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
>>> +static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
>>> + struct ttm_buffer_object *bo,
>>
>>
>> Probably better to split this patch into multiple patches.
>>
>> One which changes amdgpu_ttm_map_buffer() and then another one or two for the
>> higher level copy_buffer and fill_buffer functions.
>
> OK.
>
>>
>>> struct ttm_resource *mem,
>>> struct amdgpu_res_cursor *mm_cur,
>>> unsigned int window, struct amdgpu_ring *ring,
>>> @@ -224,7 +226,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object
>>> *bo,
>>> num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
>>> num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>>> - r = amdgpu_job_alloc_with_ib(adev, &adev->mman.default_entity.base,
>>> + r = amdgpu_job_alloc_with_ib(adev, entity,
>>> AMDGPU_FENCE_OWNER_UNDEFINED,
>>> num_dw * 4 + num_bytes,
>>> AMDGPU_IB_POOL_DELAYED, &job,
>>> @@ -274,6 +276,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object
>>> *bo,
>>> /**
>>> * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
>>> * @adev: amdgpu device
>>> + * @entity: entity to run the jobs
>>> * @src: buffer/address where to read from
>>> * @dst: buffer/address where to write to
>>> * @size: number of bytes to copy
>>> @@ -288,6 +291,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object
>>> *bo,
>>> */
>>> __attribute__((nonnull))
>>> static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>>> + struct drm_sched_entity *entity,
>>> const struct amdgpu_copy_mem *src,
>>> const struct amdgpu_copy_mem *dst,
>>> uint64_t size, bool tmz,
>>> @@ -320,12 +324,14 @@ static int amdgpu_ttm_copy_mem_to_mem(struct
>>> amdgpu_device *adev,
>>> cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
>>> /* Map src to window 0 and dst to window 1. */
>>> - r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
>>> + r = amdgpu_ttm_map_buffer(entity,
>>> + src->bo, src->mem, &src_mm,
>>> 0, ring, tmz, &cur_size, &from);
>>> if (r)
>>> goto error;
>>> - r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
>>> + r = amdgpu_ttm_map_buffer(entity,
>>> + dst->bo, dst->mem, &dst_mm,
>>> 1, ring, tmz, &cur_size, &to);
>>> if (r)
>>> goto error;
>>> @@ -353,7 +359,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct
>>> amdgpu_device *adev,
>>> write_compress_disable));
>>> }
>>> - r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
>>> + r = amdgpu_copy_buffer(ring, entity, from, to, cur_size, resv,
>>> &next, true, copy_flags);
>>> if (r)
>>> goto error;
>>> @@ -394,7 +400,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>>> src.offset = 0;
>>> dst.offset = 0;
>>> - r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
>>> + r = amdgpu_ttm_copy_mem_to_mem(adev,
>>> + &adev->mman.move_entity.base,
>>> + &src, &dst,
>>> new_mem->size,
>>> amdgpu_bo_encrypted(abo),
>>> bo->base.resv, &fence);
>>> @@ -406,8 +414,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>>> (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
>>> struct dma_fence *wipe_fence = NULL;
>>> - r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
>>> - false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
>>> + r = amdgpu_fill_buffer(&adev->mman.move_entity,
>>> + abo, 0, NULL, &wipe_fence,
>>> + AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
>>> if (r) {
>>> goto error;
>>> } else if (wipe_fence) {
>>> @@ -2223,16 +2232,15 @@ void amdgpu_ttm_set_buffer_funcs_status(struct
>>> amdgpu_device *adev, bool enable)
>>> }
>>> static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
>>> + struct drm_sched_entity *entity,
>>> unsigned int num_dw,
>>> struct dma_resv *resv,
>>> bool vm_needs_flush,
>>> struct amdgpu_job **job,
>>> - bool delayed, u64 k_job_id)
>>> + u64 k_job_id)
>>> {
>>> enum amdgpu_ib_pool_type pool = AMDGPU_IB_POOL_DELAYED;
>>> int r;
>>> - struct drm_sched_entity *entity = delayed ? &adev->mman.clear_entity.base :
>>> - &adev->mman.move_entity.base;
>>> r = amdgpu_job_alloc_with_ib(adev, entity,
>>> AMDGPU_FENCE_OWNER_UNDEFINED,
>>> num_dw * 4, pool, job, k_job_id);
>>> @@ -2252,7 +2260,9 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device
>>> *adev,
>>> DMA_RESV_USAGE_BOOKKEEP);
>>> }
>>> -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>>> +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
>>> + struct drm_sched_entity *entity,
>>> + uint64_t src_offset,
>>> uint64_t dst_offset, uint32_t byte_count,
>>> struct dma_resv *resv,
>>> struct dma_fence **fence,
>>> @@ -2274,8 +2284,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
>>> uint64_t src_offset,
>>> max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
>>> num_loops = DIV_ROUND_UP(byte_count, max_bytes);
>>> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
>>> - r = amdgpu_ttm_prepare_job(adev, num_dw,
>>> - resv, vm_needs_flush, &job, false,
>>> + r = amdgpu_ttm_prepare_job(adev, entity, num_dw,
>>> + resv, vm_needs_flush, &job,
>>> AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);
>>> if (r)
>>> return r;
>>> @@ -2304,11 +2314,13 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
>>> uint64_t src_offset,
>>> return r;
>>> }
>>> -static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
>>> +static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring,
>>> + struct drm_sched_entity *entity,
>>> + uint32_t src_data,
>>> uint64_t dst_addr, uint32_t byte_count,
>>> struct dma_resv *resv,
>>> struct dma_fence **fence,
>>> - bool vm_needs_flush, bool delayed,
>>> + bool vm_needs_flush,
>>> u64 k_job_id)
>>> {
>>> struct amdgpu_device *adev = ring->adev;
>>> @@ -2321,8 +2333,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring
>>> *ring, uint32_t src_data,
>>> max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
>>> num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
>>> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
>>> - r = amdgpu_ttm_prepare_job(adev, num_dw, resv, vm_needs_flush,
>>> - &job, delayed, k_job_id);
>>> + r = amdgpu_ttm_prepare_job(adev, entity, num_dw, resv,
>>> + vm_needs_flush, &job, k_job_id);
>>> if (r)
>>> return r;
>>> @@ -2386,13 +2398,14 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>>> /* Never clear more than 256MiB at once to avoid timeouts */
>>> size = min(cursor.size, 256ULL << 20);
>>> - r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
>>> + r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity.base,
>>> + &bo->tbo, bo->tbo.resource, &cursor,
>>> 1, ring, false, &size, &addr);
>>> if (r)
>>> goto err;
>>> - r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
>>> - &next, true, true,
>>> + r = amdgpu_ttm_fill_mem(ring, &adev->mman.clear_entity.base, 0,
>>> addr, size, resv,
>>> + &next, true,
>>> AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
>>> if (r)
>>> goto err;
>>> @@ -2408,12 +2421,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>>> return r;
>>> }
>>> -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>> - uint32_t src_data,
>>> - struct dma_resv *resv,
>>> - struct dma_fence **f,
>>> - bool delayed,
>>> - u64 k_job_id)
>>> +int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
>>> + struct amdgpu_bo *bo,
>>> + uint32_t src_data,
>>> + struct dma_resv *resv,
>>> + struct dma_fence **f,
>>> + u64 k_job_id)
>>> {
>>> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>>> @@ -2437,13 +2450,15 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>> /* Never fill more than 256MiB at once to avoid timeouts */
>>> cur_size = min(dst.size, 256ULL << 20);
>>> - r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
>>> + r = amdgpu_ttm_map_buffer(&entity->base,
>>> + &bo->tbo, bo->tbo.resource, &dst,
>>> 1, ring, false, &cur_size, &to);
>>> if (r)
>>> goto error;
>>> - r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
>>> - &next, true, delayed, k_job_id);
>>> + r = amdgpu_ttm_fill_mem(ring, &entity->base,
>>> + src_data, to, cur_size, resv,
>>> + &next, true, k_job_id);
>>> if (r)
>>> goto error;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/
>>> amdgpu/amdgpu_ttm.h
>>> index d2295d6c2b67..e1655f86a016 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> @@ -167,7 +167,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev);
>>> void amdgpu_ttm_fini(struct amdgpu_device *adev);
>>> void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
>>> bool enable);
>>> -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>>> +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
>>> + struct drm_sched_entity *entity,
>>
>> If I'm not completely mistaken you should be able to drop the ring argument
>> since that can be determined from the entity.
>
> OK will do.
>
AFAIU the only way to get the ring from the entity is to get it from the
drm_gpu_scheduler pointer. This would require adding a new function:
struct drm_gpu_scheduler *
drm_sched_entity_get_scheduler(struct drm_sched_entity *entity) {
struct drm_gpu_scheduler *sched;
spin_lock(&entity->lock);
if (entity->rq)
sched = entity->rq->sched;
spin_unlock(&entity->lock);
return sched;
}
Alternatively, I can access the ring from the buffer_funcs_ring /
buffer_funcs_sched stored in amdgpu_mman.
What do you think?
Thanks,
Pierre-Eric
>
>
>>
>> Apart from that looks rather good to me.
>>
>> Regards,
>> Christian.
>>
>>> + uint64_t src_offset,
>>> uint64_t dst_offset, uint32_t byte_count,
>>> struct dma_resv *resv,
>>> struct dma_fence **fence,
>>> @@ -175,12 +177,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
>>> uint64_t src_offset,
>>> int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>>> struct dma_resv *resv,
>>> struct dma_fence **fence);
>>> -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>> - uint32_t src_data,
>>> - struct dma_resv *resv,
>>> - struct dma_fence **fence,
>>> - bool delayed,
>>> - u64 k_job_id);
>>> +int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
>>> + struct amdgpu_bo *bo,
>>> + uint32_t src_data,
>>> + struct dma_resv *resv,
>>> + struct dma_fence **f,
>>> + u64 k_job_id);
>>> int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
>>> void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/
>>> amdkfd/kfd_migrate.c
>>> index d74ff6e90590..09756132fa1b 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
>>> @@ -157,7 +157,8 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev,
>>> dma_addr_t *sys,
>>> goto out_unlock;
>>> }
>>> - r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
>>> + r = amdgpu_copy_buffer(ring, &entity->base,
>>> + gart_s, gart_d, size * PAGE_SIZE,
>>> NULL, &next, true, 0);
>>> if (r) {
>>> dev_err(adev->dev, "fail %d to copy memory\n", r);
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH v2 05/20] drm/amdgpu: pass the entity to use to ttm functions
2025-11-13 16:05 ` [PATCH v2 05/20] drm/amdgpu: pass the entity to use to ttm functions Pierre-Eric Pelloux-Prayer
2025-11-14 13:07 ` Christian König
@ 2025-11-14 20:20 ` Felix Kuehling
1 sibling, 0 replies; 20+ messages in thread
From: Felix Kuehling @ 2025-11-14 20:20 UTC (permalink / raw)
To: Pierre-Eric Pelloux-Prayer, Alex Deucher, Christian König,
David Airlie, Simona Vetter, Sumit Semwal
Cc: amd-gfx, dri-devel, linux-kernel, linux-media, linaro-mm-sig
On 2025-11-13 11:05, Pierre-Eric Pelloux-Prayer wrote:
> This way the caller can select the one it wants to use.
>
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
I agree with Christian's comment to eliminate the ring parameter where
it's implied by the entity. Other than that, the patch is
Acked-by: Felix Kuehling <felix.kuehling@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 3 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 75 +++++++++++--------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 16 ++--
> drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 +-
> 5 files changed, 60 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> index 02c2479a8840..b59040a8771f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> @@ -38,7 +38,8 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
> stime = ktime_get();
> for (i = 0; i < n; i++) {
> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> - r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
> + r = amdgpu_copy_buffer(ring, &adev->mman.default_entity.base,
> + saddr, daddr, size, NULL, &fence,
> false, 0);
> if (r)
> goto exit_do_move;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index e08f58de4b17..c06c132a753c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1321,8 +1321,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
> if (r)
> goto out;
>
> - r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true,
> - AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
> + r = amdgpu_fill_buffer(&adev->mman.clear_entity, abo, 0, &bo->base._resv,
> + &fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
> if (WARN_ON(r))
> goto out;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 42d448cd6a6d..c8d59ca2b3bd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -164,6 +164,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
>
> /**
> * amdgpu_ttm_map_buffer - Map memory into the GART windows
> + * @entity: entity to run the window setup job
> * @bo: buffer object to map
> * @mem: memory object to map
> * @mm_cur: range to map
> @@ -176,7 +177,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
> * Setup one of the GART windows to access a specific piece of memory or return
> * the physical address for local memory.
> */
> -static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
> +static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
> + struct ttm_buffer_object *bo,
> struct ttm_resource *mem,
> struct amdgpu_res_cursor *mm_cur,
> unsigned int window, struct amdgpu_ring *ring,
> @@ -224,7 +226,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
> num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
> num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>
> - r = amdgpu_job_alloc_with_ib(adev, &adev->mman.default_entity.base,
> + r = amdgpu_job_alloc_with_ib(adev, entity,
> AMDGPU_FENCE_OWNER_UNDEFINED,
> num_dw * 4 + num_bytes,
> AMDGPU_IB_POOL_DELAYED, &job,
> @@ -274,6 +276,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
> /**
> * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
> * @adev: amdgpu device
> + * @entity: entity to run the jobs
> * @src: buffer/address where to read from
> * @dst: buffer/address where to write to
> * @size: number of bytes to copy
> @@ -288,6 +291,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
> */
> __attribute__((nonnull))
> static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> + struct drm_sched_entity *entity,
> const struct amdgpu_copy_mem *src,
> const struct amdgpu_copy_mem *dst,
> uint64_t size, bool tmz,
> @@ -320,12 +324,14 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
>
> /* Map src to window 0 and dst to window 1. */
> - r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
> + r = amdgpu_ttm_map_buffer(entity,
> + src->bo, src->mem, &src_mm,
> 0, ring, tmz, &cur_size, &from);
> if (r)
> goto error;
>
> - r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
> + r = amdgpu_ttm_map_buffer(entity,
> + dst->bo, dst->mem, &dst_mm,
> 1, ring, tmz, &cur_size, &to);
> if (r)
> goto error;
> @@ -353,7 +359,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> write_compress_disable));
> }
>
> - r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
> + r = amdgpu_copy_buffer(ring, entity, from, to, cur_size, resv,
> &next, true, copy_flags);
> if (r)
> goto error;
> @@ -394,7 +400,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> src.offset = 0;
> dst.offset = 0;
>
> - r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
> + r = amdgpu_ttm_copy_mem_to_mem(adev,
> + &adev->mman.move_entity.base,
> + &src, &dst,
> new_mem->size,
> amdgpu_bo_encrypted(abo),
> bo->base.resv, &fence);
> @@ -406,8 +414,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
> struct dma_fence *wipe_fence = NULL;
>
> - r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
> - false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
> + r = amdgpu_fill_buffer(&adev->mman.move_entity,
> + abo, 0, NULL, &wipe_fence,
> + AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
> if (r) {
> goto error;
> } else if (wipe_fence) {
> @@ -2223,16 +2232,15 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
> }
>
> static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
> + struct drm_sched_entity *entity,
> unsigned int num_dw,
> struct dma_resv *resv,
> bool vm_needs_flush,
> struct amdgpu_job **job,
> - bool delayed, u64 k_job_id)
> + u64 k_job_id)
> {
> enum amdgpu_ib_pool_type pool = AMDGPU_IB_POOL_DELAYED;
> int r;
> - struct drm_sched_entity *entity = delayed ? &adev->mman.clear_entity.base :
> - &adev->mman.move_entity.base;
> r = amdgpu_job_alloc_with_ib(adev, entity,
> AMDGPU_FENCE_OWNER_UNDEFINED,
> num_dw * 4, pool, job, k_job_id);
> @@ -2252,7 +2260,9 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
> DMA_RESV_USAGE_BOOKKEEP);
> }
>
> -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
> + struct drm_sched_entity *entity,
> + uint64_t src_offset,
> uint64_t dst_offset, uint32_t byte_count,
> struct dma_resv *resv,
> struct dma_fence **fence,
> @@ -2274,8 +2284,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
> num_loops = DIV_ROUND_UP(byte_count, max_bytes);
> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
> - r = amdgpu_ttm_prepare_job(adev, num_dw,
> - resv, vm_needs_flush, &job, false,
> + r = amdgpu_ttm_prepare_job(adev, entity, num_dw,
> + resv, vm_needs_flush, &job,
> AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);
> if (r)
> return r;
> @@ -2304,11 +2314,13 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> return r;
> }
>
> -static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
> +static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring,
> + struct drm_sched_entity *entity,
> + uint32_t src_data,
> uint64_t dst_addr, uint32_t byte_count,
> struct dma_resv *resv,
> struct dma_fence **fence,
> - bool vm_needs_flush, bool delayed,
> + bool vm_needs_flush,
> u64 k_job_id)
> {
> struct amdgpu_device *adev = ring->adev;
> @@ -2321,8 +2333,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
> max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
> num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
> - r = amdgpu_ttm_prepare_job(adev, num_dw, resv, vm_needs_flush,
> - &job, delayed, k_job_id);
> + r = amdgpu_ttm_prepare_job(adev, entity, num_dw, resv,
> + vm_needs_flush, &job, k_job_id);
> if (r)
> return r;
>
> @@ -2386,13 +2398,14 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> /* Never clear more than 256MiB at once to avoid timeouts */
> size = min(cursor.size, 256ULL << 20);
>
> - r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
> + r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity.base,
> + &bo->tbo, bo->tbo.resource, &cursor,
> 1, ring, false, &size, &addr);
> if (r)
> goto err;
>
> - r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
> - &next, true, true,
> + r = amdgpu_ttm_fill_mem(ring, &adev->mman.clear_entity.base, 0, addr, size, resv,
> + &next, true,
> AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
> if (r)
> goto err;
> @@ -2408,12 +2421,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> return r;
> }
>
> -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> - uint32_t src_data,
> - struct dma_resv *resv,
> - struct dma_fence **f,
> - bool delayed,
> - u64 k_job_id)
> +int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
> + struct amdgpu_bo *bo,
> + uint32_t src_data,
> + struct dma_resv *resv,
> + struct dma_fence **f,
> + u64 k_job_id)
> {
> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> @@ -2437,13 +2450,15 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> /* Never fill more than 256MiB at once to avoid timeouts */
> cur_size = min(dst.size, 256ULL << 20);
>
> - r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
> + r = amdgpu_ttm_map_buffer(&entity->base,
> + &bo->tbo, bo->tbo.resource, &dst,
> 1, ring, false, &cur_size, &to);
> if (r)
> goto error;
>
> - r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
> - &next, true, delayed, k_job_id);
> + r = amdgpu_ttm_fill_mem(ring, &entity->base,
> + src_data, to, cur_size, resv,
> + &next, true, k_job_id);
> if (r)
> goto error;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index d2295d6c2b67..e1655f86a016 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -167,7 +167,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev);
> void amdgpu_ttm_fini(struct amdgpu_device *adev);
> void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
> bool enable);
> -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
> + struct drm_sched_entity *entity,
> + uint64_t src_offset,
> uint64_t dst_offset, uint32_t byte_count,
> struct dma_resv *resv,
> struct dma_fence **fence,
> @@ -175,12 +177,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> struct dma_resv *resv,
> struct dma_fence **fence);
> -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> - uint32_t src_data,
> - struct dma_resv *resv,
> - struct dma_fence **fence,
> - bool delayed,
> - u64 k_job_id);
> +int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
> + struct amdgpu_bo *bo,
> + uint32_t src_data,
> + struct dma_resv *resv,
> + struct dma_fence **f,
> + u64 k_job_id);
>
> int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
> void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index d74ff6e90590..09756132fa1b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -157,7 +157,8 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
> goto out_unlock;
> }
>
> - r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
> + r = amdgpu_copy_buffer(ring, &entity->base,
> + gart_s, gart_d, size * PAGE_SIZE,
> NULL, &next, true, 0);
> if (r) {
> dev_err(adev->dev, "fail %d to copy memory\n", r);
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH v2 09/20] drm/amdgpu: pass optional dependency to amdgpu_fill_buffer
2025-11-13 16:05 [PATCH v2 00/20] drm/amdgpu: use all SDMA instances for TTM clears and moves Pierre-Eric Pelloux-Prayer
` (2 preceding siblings ...)
2025-11-13 16:05 ` [PATCH v2 05/20] drm/amdgpu: pass the entity to use to ttm functions Pierre-Eric Pelloux-Prayer
@ 2025-11-13 16:05 ` Pierre-Eric Pelloux-Prayer
2025-11-17 8:43 ` Christian König
2025-11-13 16:05 ` [PATCH v2 10/20] drm/admgpu: handle resv dependencies in amdgpu_ttm_map_buffer Pierre-Eric Pelloux-Prayer
` (2 subsequent siblings)
6 siblings, 1 reply; 20+ messages in thread
From: Pierre-Eric Pelloux-Prayer @ 2025-11-13 16:05 UTC (permalink / raw)
To: Alex Deucher, Christian König, David Airlie, Simona Vetter,
Sumit Semwal
Cc: Pierre-Eric Pelloux-Prayer, amd-gfx, dri-devel, linux-kernel,
linux-media, linaro-mm-sig
In case the fill job depends on a previous fence, the caller can
now pass it to make sure the ordering of the jobs is correct.
Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 22 ++++++++++++++++------
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 1 +
3 files changed, 18 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index e7b2cae031b3..be3532134e46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1322,7 +1322,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
goto out;
r = amdgpu_fill_buffer(&adev->mman.clear_entities[0], abo, 0, &bo->base._resv,
- &fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
+ &fence, NULL, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
if (WARN_ON(r))
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e1f0567fd2d5..b13f0993dbf1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -173,6 +173,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
* @tmz: if we should setup a TMZ enabled mapping
* @size: in number of bytes to map, out number of bytes mapped
* @addr: resulting address inside the MC address space
+ * @dep: optional dependency
*
* Setup one of the GART windows to access a specific piece of memory or return
* the physical address for local memory.
@@ -182,7 +183,8 @@ static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
struct ttm_resource *mem,
struct amdgpu_res_cursor *mm_cur,
unsigned int window, struct amdgpu_ring *ring,
- bool tmz, uint64_t *size, uint64_t *addr)
+ bool tmz, uint64_t *size, uint64_t *addr,
+ struct dma_fence *dep)
{
struct amdgpu_device *adev = ring->adev;
unsigned int offset, num_pages, num_dw, num_bytes;
@@ -234,6 +236,9 @@ static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
if (r)
return r;
+ if (dep)
+ drm_sched_job_add_dependency(&job->base, dma_fence_get(dep));
+
src_addr = num_dw * 4;
src_addr += job->ibs[0].gpu_addr;
@@ -326,13 +331,15 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
/* Map src to window 0 and dst to window 1. */
r = amdgpu_ttm_map_buffer(&entity->base,
src->bo, src->mem, &src_mm,
- entity->gart_window_id0, ring, tmz, &cur_size, &from);
+ entity->gart_window_id0, ring, tmz, &cur_size, &from,
+ NULL);
if (r)
goto error;
r = amdgpu_ttm_map_buffer(&entity->base,
dst->bo, dst->mem, &dst_mm,
- entity->gart_window_id1, ring, tmz, &cur_size, &to);
+ entity->gart_window_id1, ring, tmz, &cur_size, &to,
+ NULL);
if (r)
goto error;
@@ -415,7 +422,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
struct dma_fence *wipe_fence = NULL;
r = amdgpu_fill_buffer(&adev->mman.move_entities[0],
- abo, 0, NULL, &wipe_fence,
+ abo, 0, NULL, &wipe_fence, fence,
AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
if (r) {
goto error;
@@ -2443,7 +2450,8 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
r = amdgpu_ttm_map_buffer(&entity->base,
&bo->tbo, bo->tbo.resource, &cursor,
- entity->gart_window_id1, ring, false, &size, &addr);
+ entity->gart_window_id1, ring, false, &size, &addr,
+ NULL);
if (r)
goto err;
@@ -2469,6 +2477,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
uint32_t src_data,
struct dma_resv *resv,
struct dma_fence **f,
+ struct dma_fence *dependency,
u64 k_job_id)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
@@ -2496,7 +2505,8 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
r = amdgpu_ttm_map_buffer(&entity->base,
&bo->tbo, bo->tbo.resource, &dst,
entity->gart_window_id1, ring, false,
- &cur_size, &to);
+ &cur_size, &to,
+ dependency);
if (r)
goto error;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 9d4891e86675..e8f8165f5bcf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -186,6 +186,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
uint32_t src_data,
struct dma_resv *resv,
struct dma_fence **f,
+ struct dma_fence *dependency,
u64 k_job_id);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
--
2.43.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH v2 09/20] drm/amdgpu: pass optional dependency to amdgpu_fill_buffer
2025-11-13 16:05 ` [PATCH v2 09/20] drm/amdgpu: pass optional dependency to amdgpu_fill_buffer Pierre-Eric Pelloux-Prayer
@ 2025-11-17 8:43 ` Christian König
0 siblings, 0 replies; 20+ messages in thread
From: Christian König @ 2025-11-17 8:43 UTC (permalink / raw)
To: Pierre-Eric Pelloux-Prayer, Alex Deucher, David Airlie,
Simona Vetter, Sumit Semwal
Cc: amd-gfx, dri-devel, linux-kernel, linux-media, linaro-mm-sig
On 11/13/25 17:05, Pierre-Eric Pelloux-Prayer wrote:
> In case the fill job depends on a previous fence, the caller can
> now pass it to make sure the ordering of the jobs is correct.
I don't think you need that patch any more.
>
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 22 ++++++++++++++++------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 1 +
> 3 files changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index e7b2cae031b3..be3532134e46 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1322,7 +1322,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
> goto out;
>
> r = amdgpu_fill_buffer(&adev->mman.clear_entities[0], abo, 0, &bo->base._resv,
> - &fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
> + &fence, NULL, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
> if (WARN_ON(r))
> goto out;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index e1f0567fd2d5..b13f0993dbf1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -173,6 +173,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
> * @tmz: if we should setup a TMZ enabled mapping
> * @size: in number of bytes to map, out number of bytes mapped
> * @addr: resulting address inside the MC address space
> + * @dep: optional dependency
> *
> * Setup one of the GART windows to access a specific piece of memory or return
> * the physical address for local memory.
> @@ -182,7 +183,8 @@ static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
> struct ttm_resource *mem,
> struct amdgpu_res_cursor *mm_cur,
> unsigned int window, struct amdgpu_ring *ring,
> - bool tmz, uint64_t *size, uint64_t *addr)
> + bool tmz, uint64_t *size, uint64_t *addr,
> + struct dma_fence *dep)
> {
> struct amdgpu_device *adev = ring->adev;
> unsigned int offset, num_pages, num_dw, num_bytes;
> @@ -234,6 +236,9 @@ static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
> if (r)
> return r;
>
> + if (dep)
> + drm_sched_job_add_dependency(&job->base, dma_fence_get(dep));
> +
> src_addr = num_dw * 4;
> src_addr += job->ibs[0].gpu_addr;
>
> @@ -326,13 +331,15 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> /* Map src to window 0 and dst to window 1. */
> r = amdgpu_ttm_map_buffer(&entity->base,
> src->bo, src->mem, &src_mm,
> - entity->gart_window_id0, ring, tmz, &cur_size, &from);
> + entity->gart_window_id0, ring, tmz, &cur_size, &from,
> + NULL);
> if (r)
> goto error;
>
> r = amdgpu_ttm_map_buffer(&entity->base,
> dst->bo, dst->mem, &dst_mm,
> - entity->gart_window_id1, ring, tmz, &cur_size, &to);
> + entity->gart_window_id1, ring, tmz, &cur_size, &to,
> + NULL);
> if (r)
> goto error;
>
> @@ -415,7 +422,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> struct dma_fence *wipe_fence = NULL;
>
> r = amdgpu_fill_buffer(&adev->mman.move_entities[0],
> - abo, 0, NULL, &wipe_fence,
> + abo, 0, NULL, &wipe_fence, fence,
> AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
> if (r) {
> goto error;
> @@ -2443,7 +2450,8 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>
> r = amdgpu_ttm_map_buffer(&entity->base,
> &bo->tbo, bo->tbo.resource, &cursor,
> - entity->gart_window_id1, ring, false, &size, &addr);
> + entity->gart_window_id1, ring, false, &size, &addr,
> + NULL);
> if (r)
> goto err;
>
> @@ -2469,6 +2477,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
> uint32_t src_data,
> struct dma_resv *resv,
> struct dma_fence **f,
> + struct dma_fence *dependency,
> u64 k_job_id)
> {
> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> @@ -2496,7 +2505,8 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
> r = amdgpu_ttm_map_buffer(&entity->base,
> &bo->tbo, bo->tbo.resource, &dst,
> entity->gart_window_id1, ring, false,
> - &cur_size, &to);
> + &cur_size, &to,
> + dependency);
> if (r)
> goto error;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 9d4891e86675..e8f8165f5bcf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -186,6 +186,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
> uint32_t src_data,
> struct dma_resv *resv,
> struct dma_fence **f,
> + struct dma_fence *dependency,
> u64 k_job_id);
>
> int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH v2 10/20] drm/admgpu: handle resv dependencies in amdgpu_ttm_map_buffer
2025-11-13 16:05 [PATCH v2 00/20] drm/amdgpu: use all SDMA instances for TTM clears and moves Pierre-Eric Pelloux-Prayer
` (3 preceding siblings ...)
2025-11-13 16:05 ` [PATCH v2 09/20] drm/amdgpu: pass optional dependency to amdgpu_fill_buffer Pierre-Eric Pelloux-Prayer
@ 2025-11-13 16:05 ` Pierre-Eric Pelloux-Prayer
2025-11-17 8:44 ` Christian König
2025-11-13 16:05 ` [PATCH v2 17/20] drm/amdgpu: get rid of amdgpu_ttm_clear_buffer Pierre-Eric Pelloux-Prayer
2025-11-13 16:05 ` [PATCH v2 18/20] drm/amdgpu: rename amdgpu_fill_buffer as amdgpu_ttm_clear_buffer Pierre-Eric Pelloux-Prayer
6 siblings, 1 reply; 20+ messages in thread
From: Pierre-Eric Pelloux-Prayer @ 2025-11-13 16:05 UTC (permalink / raw)
To: Alex Deucher, Christian König, David Airlie, Simona Vetter,
Sumit Semwal
Cc: Pierre-Eric Pelloux-Prayer, amd-gfx, dri-devel, linux-kernel,
linux-media, linaro-mm-sig
If a resv object is passed, its fences are treated as a dependency
for the amdgpu_ttm_map_buffer operation.
This will be used by amdgpu_bo_release_notify through
amdgpu_fill_buffer.
Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index b13f0993dbf1..411997db70eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -184,7 +184,8 @@ static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
struct amdgpu_res_cursor *mm_cur,
unsigned int window, struct amdgpu_ring *ring,
bool tmz, uint64_t *size, uint64_t *addr,
- struct dma_fence *dep)
+ struct dma_fence *dep,
+ struct dma_resv *resv)
{
struct amdgpu_device *adev = ring->adev;
unsigned int offset, num_pages, num_dw, num_bytes;
@@ -239,6 +240,10 @@ static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
if (dep)
drm_sched_job_add_dependency(&job->base, dma_fence_get(dep));
+ if (resv)
+ drm_sched_job_add_resv_dependencies(&job->base, resv,
+ DMA_RESV_USAGE_BOOKKEEP);
+
src_addr = num_dw * 4;
src_addr += job->ibs[0].gpu_addr;
@@ -332,14 +337,14 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
r = amdgpu_ttm_map_buffer(&entity->base,
src->bo, src->mem, &src_mm,
entity->gart_window_id0, ring, tmz, &cur_size, &from,
- NULL);
+ NULL, NULL);
if (r)
goto error;
r = amdgpu_ttm_map_buffer(&entity->base,
dst->bo, dst->mem, &dst_mm,
entity->gart_window_id1, ring, tmz, &cur_size, &to,
- NULL);
+ NULL, NULL);
if (r)
goto error;
@@ -2451,7 +2456,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
r = amdgpu_ttm_map_buffer(&entity->base,
&bo->tbo, bo->tbo.resource, &cursor,
entity->gart_window_id1, ring, false, &size, &addr,
- NULL);
+ NULL, NULL);
if (r)
goto err;
@@ -2506,7 +2511,8 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
&bo->tbo, bo->tbo.resource, &dst,
entity->gart_window_id1, ring, false,
&cur_size, &to,
- dependency);
+ dependency,
+ resv);
if (r)
goto error;
--
2.43.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH v2 10/20] drm/admgpu: handle resv dependencies in amdgpu_ttm_map_buffer
2025-11-13 16:05 ` [PATCH v2 10/20] drm/admgpu: handle resv dependencies in amdgpu_ttm_map_buffer Pierre-Eric Pelloux-Prayer
@ 2025-11-17 8:44 ` Christian König
2025-11-19 8:28 ` Pierre-Eric Pelloux-Prayer
0 siblings, 1 reply; 20+ messages in thread
From: Christian König @ 2025-11-17 8:44 UTC (permalink / raw)
To: Pierre-Eric Pelloux-Prayer, Alex Deucher, David Airlie,
Simona Vetter, Sumit Semwal
Cc: amd-gfx, dri-devel, linux-kernel, linux-media, linaro-mm-sig
On 11/13/25 17:05, Pierre-Eric Pelloux-Prayer wrote:
> If a resv object is passed, its fences are treated as a dependency
> for the amdgpu_ttm_map_buffer operation.
>
> This will be used by amdgpu_bo_release_notify through
> amdgpu_fill_buffer.
Why should updating the GART window depend on fences in a resv object?
Regards,
Christian.
>
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 16 +++++++++++-----
> 1 file changed, 11 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index b13f0993dbf1..411997db70eb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -184,7 +184,8 @@ static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
> struct amdgpu_res_cursor *mm_cur,
> unsigned int window, struct amdgpu_ring *ring,
> bool tmz, uint64_t *size, uint64_t *addr,
> - struct dma_fence *dep)
> + struct dma_fence *dep,
> + struct dma_resv *resv)
> {
> struct amdgpu_device *adev = ring->adev;
> unsigned int offset, num_pages, num_dw, num_bytes;
> @@ -239,6 +240,10 @@ static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
> if (dep)
> drm_sched_job_add_dependency(&job->base, dma_fence_get(dep));
>
> + if (resv)
> + drm_sched_job_add_resv_dependencies(&job->base, resv,
> + DMA_RESV_USAGE_BOOKKEEP);
> +
> src_addr = num_dw * 4;
> src_addr += job->ibs[0].gpu_addr;
>
> @@ -332,14 +337,14 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> r = amdgpu_ttm_map_buffer(&entity->base,
> src->bo, src->mem, &src_mm,
> entity->gart_window_id0, ring, tmz, &cur_size, &from,
> - NULL);
> + NULL, NULL);
> if (r)
> goto error;
>
> r = amdgpu_ttm_map_buffer(&entity->base,
> dst->bo, dst->mem, &dst_mm,
> entity->gart_window_id1, ring, tmz, &cur_size, &to,
> - NULL);
> + NULL, NULL);
> if (r)
> goto error;
>
> @@ -2451,7 +2456,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> r = amdgpu_ttm_map_buffer(&entity->base,
> &bo->tbo, bo->tbo.resource, &cursor,
> entity->gart_window_id1, ring, false, &size, &addr,
> - NULL);
> + NULL, NULL);
> if (r)
> goto err;
>
> @@ -2506,7 +2511,8 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
> &bo->tbo, bo->tbo.resource, &dst,
> entity->gart_window_id1, ring, false,
> &cur_size, &to,
> - dependency);
> + dependency,
> + resv);
> if (r)
> goto error;
>
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH v2 10/20] drm/admgpu: handle resv dependencies in amdgpu_ttm_map_buffer
2025-11-17 8:44 ` Christian König
@ 2025-11-19 8:28 ` Pierre-Eric Pelloux-Prayer
0 siblings, 0 replies; 20+ messages in thread
From: Pierre-Eric Pelloux-Prayer @ 2025-11-19 8:28 UTC (permalink / raw)
To: Christian König, Pierre-Eric Pelloux-Prayer, Alex Deucher,
David Airlie, Simona Vetter, Sumit Semwal
Cc: amd-gfx, dri-devel, linux-kernel, linux-media, linaro-mm-sig
Le 17/11/2025 à 09:44, Christian König a écrit :
> On 11/13/25 17:05, Pierre-Eric Pelloux-Prayer wrote:
>> If a resv object is passed, its fences are treated as a dependency
>> for the amdgpu_ttm_map_buffer operation.
>>
>> This will be used by amdgpu_bo_release_notify through
>> amdgpu_fill_buffer.
>
> Why should updating the GART window depend on fences in a resv object?
>
You're right, this is not needed. I'll drop the patch.
Pierre-Eric
> Regards,
> Christian.
>
>>
>> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 16 +++++++++++-----
>> 1 file changed, 11 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index b13f0993dbf1..411997db70eb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -184,7 +184,8 @@ static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
>> struct amdgpu_res_cursor *mm_cur,
>> unsigned int window, struct amdgpu_ring *ring,
>> bool tmz, uint64_t *size, uint64_t *addr,
>> - struct dma_fence *dep)
>> + struct dma_fence *dep,
>> + struct dma_resv *resv)
>> {
>> struct amdgpu_device *adev = ring->adev;
>> unsigned int offset, num_pages, num_dw, num_bytes;
>> @@ -239,6 +240,10 @@ static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
>> if (dep)
>> drm_sched_job_add_dependency(&job->base, dma_fence_get(dep));
>>
>> + if (resv)
>> + drm_sched_job_add_resv_dependencies(&job->base, resv,
>> + DMA_RESV_USAGE_BOOKKEEP);
>> +
>> src_addr = num_dw * 4;
>> src_addr += job->ibs[0].gpu_addr;
>>
>> @@ -332,14 +337,14 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>> r = amdgpu_ttm_map_buffer(&entity->base,
>> src->bo, src->mem, &src_mm,
>> entity->gart_window_id0, ring, tmz, &cur_size, &from,
>> - NULL);
>> + NULL, NULL);
>> if (r)
>> goto error;
>>
>> r = amdgpu_ttm_map_buffer(&entity->base,
>> dst->bo, dst->mem, &dst_mm,
>> entity->gart_window_id1, ring, tmz, &cur_size, &to,
>> - NULL);
>> + NULL, NULL);
>> if (r)
>> goto error;
>>
>> @@ -2451,7 +2456,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>> r = amdgpu_ttm_map_buffer(&entity->base,
>> &bo->tbo, bo->tbo.resource, &cursor,
>> entity->gart_window_id1, ring, false, &size, &addr,
>> - NULL);
>> + NULL, NULL);
>> if (r)
>> goto err;
>>
>> @@ -2506,7 +2511,8 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
>> &bo->tbo, bo->tbo.resource, &dst,
>> entity->gart_window_id1, ring, false,
>> &cur_size, &to,
>> - dependency);
>> + dependency,
>> + resv);
>> if (r)
>> goto error;
>>
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH v2 17/20] drm/amdgpu: get rid of amdgpu_ttm_clear_buffer
2025-11-13 16:05 [PATCH v2 00/20] drm/amdgpu: use all SDMA instances for TTM clears and moves Pierre-Eric Pelloux-Prayer
` (4 preceding siblings ...)
2025-11-13 16:05 ` [PATCH v2 10/20] drm/admgpu: handle resv dependencies in amdgpu_ttm_map_buffer Pierre-Eric Pelloux-Prayer
@ 2025-11-13 16:05 ` Pierre-Eric Pelloux-Prayer
2025-11-13 16:05 ` [PATCH v2 18/20] drm/amdgpu: rename amdgpu_fill_buffer as amdgpu_ttm_clear_buffer Pierre-Eric Pelloux-Prayer
6 siblings, 0 replies; 20+ messages in thread
From: Pierre-Eric Pelloux-Prayer @ 2025-11-13 16:05 UTC (permalink / raw)
To: Alex Deucher, Christian König, David Airlie, Simona Vetter,
Sumit Semwal
Cc: Pierre-Eric Pelloux-Prayer, amd-gfx, dri-devel, linux-kernel,
linux-media, linaro-mm-sig
It's doing the same thing as amdgpu_fill_buffer(src_data=0), so drop it.
The only caveat is that amdgpu_res_cleared() return value is only valid
right after allocation.
---
v2: introduce new "bool consider_clear_status" arg
---
Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 15 ++--
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 94 +++++-----------------
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 6 +-
3 files changed, 32 insertions(+), 83 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 33b397107778..4490b19752b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -725,13 +725,16 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
struct dma_fence *fence;
- r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
+ r = amdgpu_fill_buffer(NULL, bo, 0, NULL, &fence, NULL,
+ true, AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
if (unlikely(r))
goto fail_unreserve;
- dma_resv_add_fence(bo->tbo.base.resv, fence,
- DMA_RESV_USAGE_KERNEL);
- dma_fence_put(fence);
+ if (fence) {
+ dma_resv_add_fence(bo->tbo.base.resv, fence,
+ DMA_RESV_USAGE_KERNEL);
+ dma_fence_put(fence);
+ }
}
if (!bp->resv)
amdgpu_bo_unreserve(bo);
@@ -1321,8 +1324,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
if (r)
goto out;
- r = amdgpu_fill_buffer(NULL, abo, 0, &bo->base._resv,
- &fence, NULL, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
+ r = amdgpu_fill_buffer(NULL, abo, 0, &bo->base._resv, &fence, NULL,
+ false, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
if (WARN_ON(r))
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 94d0ff34593f..df05768c3817 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -435,7 +435,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
r = amdgpu_fill_buffer(entity,
abo, 0, NULL, &wipe_fence, fence,
- AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
+ false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
if (r) {
goto error;
} else if (wipe_fence) {
@@ -2418,82 +2418,27 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring,
}
/**
- * amdgpu_ttm_clear_buffer - clear memory buffers
- * @bo: amdgpu buffer object
- * @resv: reservation object
- * @fence: dma_fence associated with the operation
+ * amdgpu_fill_buffer - fill a buffer with a given value
+ * @entity: optional entity to use. If NULL, the clearing entities will be
+ * used to load-balance the partial clears
+ * @bo: the bo to fill
+ * @src_data: the value to set
+ * @resv: fences contained in this reservation will be used as dependencies.
+ * @out_fence: the fence from the last clear will be stored here. It might be
+ * NULL if no job was run.
+ * @dependency: optional input dependency fence.
+ * @consider_clear_status: true if region reported as cleared by amdgpu_res_cleared()
+ * are skipped.
+ * @k_job_id: trace id
*
- * Clear the memory buffer resource.
- *
- * Returns:
- * 0 for success or a negative error code on failure.
*/
-int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
- struct dma_resv *resv,
- struct dma_fence **fence)
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct amdgpu_ring *ring;
- struct amdgpu_ttm_buffer_entity *entity;
- struct amdgpu_res_cursor cursor;
- u64 addr;
- int r = 0;
-
- if (!adev->mman.buffer_funcs_enabled)
- return -EINVAL;
-
- if (!fence)
- return -EINVAL;
-
- ring = container_of(adev->mman.buffer_funcs_scheds[0], struct amdgpu_ring, sched);
- entity = &adev->mman.clear_entities[0];
- *fence = dma_fence_get_stub();
-
- amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
-
- mutex_lock(&entity->gart_window_lock);
- while (cursor.remaining) {
- struct dma_fence *next = NULL;
- u64 size;
-
- if (amdgpu_res_cleared(&cursor)) {
- amdgpu_res_next(&cursor, cursor.size);
- continue;
- }
-
- /* Never clear more than 256MiB at once to avoid timeouts */
- size = min(cursor.size, 256ULL << 20);
-
- r = amdgpu_ttm_map_buffer(&entity->base,
- &bo->tbo, bo->tbo.resource, &cursor,
- entity->gart_window_id1, ring, false, &size, &addr,
- NULL, NULL);
- if (r)
- goto err;
-
- r = amdgpu_ttm_fill_mem(ring, &entity->base, 0, addr, size, resv,
- &next, true,
- AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
- if (r)
- goto err;
-
- dma_fence_put(*fence);
- *fence = next;
-
- amdgpu_res_next(&cursor, size);
- }
-err:
- mutex_unlock(&entity->gart_window_lock);
-
- return r;
-}
-
int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
struct amdgpu_bo *bo,
uint32_t src_data,
struct dma_resv *resv,
- struct dma_fence **f,
+ struct dma_fence **out_fence,
struct dma_fence *dependency,
+ bool consider_clear_status,
u64 k_job_id)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
@@ -2523,6 +2468,11 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
struct dma_fence *next;
uint64_t cur_size, to;
+ if (consider_clear_status && amdgpu_res_cleared(&dst)) {
+ amdgpu_res_next(&dst, dst.size);
+ continue;
+ }
+
/* Never fill more than 256MiB at once to avoid timeouts */
cur_size = min(dst.size, 256ULL << 20);
@@ -2548,9 +2498,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
}
error:
mutex_unlock(&entity->gart_window_lock);
- if (f)
- *f = dma_fence_get(fence);
- dma_fence_put(fence);
+ *out_fence = fence;
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 63c3e2466708..e01c2173d79f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -181,15 +181,13 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
struct dma_resv *resv,
struct dma_fence **fence,
bool vm_needs_flush, uint32_t copy_flags);
-int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
- struct dma_resv *resv,
- struct dma_fence **fence);
int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
struct amdgpu_bo *bo,
uint32_t src_data,
struct dma_resv *resv,
- struct dma_fence **f,
+ struct dma_fence **out_fence,
struct dma_fence *dependency,
+ bool consider_clear_status,
u64 k_job_id);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
--
2.43.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v2 18/20] drm/amdgpu: rename amdgpu_fill_buffer as amdgpu_ttm_clear_buffer
2025-11-13 16:05 [PATCH v2 00/20] drm/amdgpu: use all SDMA instances for TTM clears and moves Pierre-Eric Pelloux-Prayer
` (5 preceding siblings ...)
2025-11-13 16:05 ` [PATCH v2 17/20] drm/amdgpu: get rid of amdgpu_ttm_clear_buffer Pierre-Eric Pelloux-Prayer
@ 2025-11-13 16:05 ` Pierre-Eric Pelloux-Prayer
2025-11-17 9:56 ` Christian König
6 siblings, 1 reply; 20+ messages in thread
From: Pierre-Eric Pelloux-Prayer @ 2025-11-13 16:05 UTC (permalink / raw)
To: Alex Deucher, Christian König, David Airlie, Simona Vetter,
Sumit Semwal
Cc: Pierre-Eric Pelloux-Prayer, amd-gfx, dri-devel, linux-kernel,
linux-media, linaro-mm-sig
This is the only use case for this function.
---
v2: amdgpu_ttm_clear_buffer instead of amdgpu_clear_buffer
---
Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 8 +++----
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 26 ++++++++++------------
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 15 ++++++-------
3 files changed, 23 insertions(+), 26 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 4490b19752b8..4b9518097899 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -725,8 +725,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
struct dma_fence *fence;
- r = amdgpu_fill_buffer(NULL, bo, 0, NULL, &fence, NULL,
- true, AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
+ r = amdgpu_ttm_clear_buffer(NULL, bo, NULL, &fence, NULL,
+ true, AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
if (unlikely(r))
goto fail_unreserve;
@@ -1324,8 +1324,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
if (r)
goto out;
- r = amdgpu_fill_buffer(NULL, abo, 0, &bo->base._resv, &fence, NULL,
- false, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
+ r = amdgpu_ttm_clear_buffer(NULL, abo, &bo->base._resv, &fence, NULL,
+ false, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
if (WARN_ON(r))
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index df05768c3817..0a55bc4ea91f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -433,9 +433,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
struct dma_fence *wipe_fence = NULL;
- r = amdgpu_fill_buffer(entity,
- abo, 0, NULL, &wipe_fence, fence,
- false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
+ r = amdgpu_ttm_clear_buffer(entity,
+ abo, NULL, &wipe_fence, fence,
+ false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
if (r) {
goto error;
} else if (wipe_fence) {
@@ -2418,11 +2418,10 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring,
}
/**
- * amdgpu_fill_buffer - fill a buffer with a given value
+ * amdgpu_ttm_clear_buffer - fill a buffer with 0
* @entity: optional entity to use. If NULL, the clearing entities will be
* used to load-balance the partial clears
* @bo: the bo to fill
- * @src_data: the value to set
* @resv: fences contained in this reservation will be used as dependencies.
* @out_fence: the fence from the last clear will be stored here. It might be
* NULL if no job was run.
@@ -2432,14 +2431,13 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring,
* @k_job_id: trace id
*
*/
-int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
- struct amdgpu_bo *bo,
- uint32_t src_data,
- struct dma_resv *resv,
- struct dma_fence **out_fence,
- struct dma_fence *dependency,
- bool consider_clear_status,
- u64 k_job_id)
+int amdgpu_ttm_clear_buffer(struct amdgpu_ttm_buffer_entity *entity,
+ struct amdgpu_bo *bo,
+ struct dma_resv *resv,
+ struct dma_fence **out_fence,
+ struct dma_fence *dependency,
+ bool consider_clear_status,
+ u64 k_job_id)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct dma_fence *fence = NULL;
@@ -2486,7 +2484,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
goto error;
r = amdgpu_ttm_fill_mem(ring, &entity->base,
- src_data, to, cur_size, resv,
+ 0, to, cur_size, resv,
&next, true, k_job_id);
if (r)
goto error;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index e01c2173d79f..585aee9a173b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -181,14 +181,13 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
struct dma_resv *resv,
struct dma_fence **fence,
bool vm_needs_flush, uint32_t copy_flags);
-int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
- struct amdgpu_bo *bo,
- uint32_t src_data,
- struct dma_resv *resv,
- struct dma_fence **out_fence,
- struct dma_fence *dependency,
- bool consider_clear_status,
- u64 k_job_id);
+int amdgpu_ttm_clear_buffer(struct amdgpu_ttm_buffer_entity *entity,
+ struct amdgpu_bo *bo,
+ struct dma_resv *resv,
+ struct dma_fence **out_fence,
+ struct dma_fence *dependency,
+ bool consider_clear_status,
+ u64 k_job_id);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
--
2.43.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH v2 18/20] drm/amdgpu: rename amdgpu_fill_buffer as amdgpu_ttm_clear_buffer
2025-11-13 16:05 ` [PATCH v2 18/20] drm/amdgpu: rename amdgpu_fill_buffer as amdgpu_ttm_clear_buffer Pierre-Eric Pelloux-Prayer
@ 2025-11-17 9:56 ` Christian König
0 siblings, 0 replies; 20+ messages in thread
From: Christian König @ 2025-11-17 9:56 UTC (permalink / raw)
To: Pierre-Eric Pelloux-Prayer, Alex Deucher, David Airlie,
Simona Vetter, Sumit Semwal
Cc: amd-gfx, dri-devel, linux-kernel, linux-media, linaro-mm-sig
On 11/13/25 17:05, Pierre-Eric Pelloux-Prayer wrote:
> This is the only use case for this function.
>
> ---
> v2: amdgpu_ttm_clear_buffer instead of amdgpu_clear_buffer
> ---
>
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 8 +++----
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 26 ++++++++++------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 15 ++++++-------
> 3 files changed, 23 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 4490b19752b8..4b9518097899 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -725,8 +725,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
> bo->tbo.resource->mem_type == TTM_PL_VRAM) {
> struct dma_fence *fence;
>
> - r = amdgpu_fill_buffer(NULL, bo, 0, NULL, &fence, NULL,
> - true, AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
> + r = amdgpu_ttm_clear_buffer(NULL, bo, NULL, &fence, NULL,
> + true, AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
> if (unlikely(r))
> goto fail_unreserve;
>
> @@ -1324,8 +1324,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
> if (r)
> goto out;
>
> - r = amdgpu_fill_buffer(NULL, abo, 0, &bo->base._resv, &fence, NULL,
> - false, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
> + r = amdgpu_ttm_clear_buffer(NULL, abo, &bo->base._resv, &fence, NULL,
> + false, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
> if (WARN_ON(r))
> goto out;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index df05768c3817..0a55bc4ea91f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -433,9 +433,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
> struct dma_fence *wipe_fence = NULL;
>
> - r = amdgpu_fill_buffer(entity,
> - abo, 0, NULL, &wipe_fence, fence,
> - false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
> + r = amdgpu_ttm_clear_buffer(entity,
> + abo, NULL, &wipe_fence, fence,
> + false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
> if (r) {
> goto error;
> } else if (wipe_fence) {
> @@ -2418,11 +2418,10 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring,
> }
>
> /**
> - * amdgpu_fill_buffer - fill a buffer with a given value
> + * amdgpu_ttm_clear_buffer - fill a buffer with 0
> * @entity: optional entity to use. If NULL, the clearing entities will be
> * used to load-balance the partial clears
> * @bo: the bo to fill
> - * @src_data: the value to set
> * @resv: fences contained in this reservation will be used as dependencies.
> * @out_fence: the fence from the last clear will be stored here. It might be
> * NULL if no job was run.
> @@ -2432,14 +2431,13 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring,
> * @k_job_id: trace id
> *
> */
> -int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
> - struct amdgpu_bo *bo,
> - uint32_t src_data,
> - struct dma_resv *resv,
> - struct dma_fence **out_fence,
> - struct dma_fence *dependency,
> - bool consider_clear_status,
> - u64 k_job_id)
> +int amdgpu_ttm_clear_buffer(struct amdgpu_ttm_buffer_entity *entity,
> + struct amdgpu_bo *bo,
> + struct dma_resv *resv,
> + struct dma_fence **out_fence,
> + struct dma_fence *dependency,
> + bool consider_clear_status,
> + u64 k_job_id)
> {
> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> struct dma_fence *fence = NULL;
> @@ -2486,7 +2484,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
> goto error;
>
> r = amdgpu_ttm_fill_mem(ring, &entity->base,
> - src_data, to, cur_size, resv,
> + 0, to, cur_size, resv,
> &next, true, k_job_id);
> if (r)
> goto error;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index e01c2173d79f..585aee9a173b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -181,14 +181,13 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
> struct dma_resv *resv,
> struct dma_fence **fence,
> bool vm_needs_flush, uint32_t copy_flags);
> -int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
> - struct amdgpu_bo *bo,
> - uint32_t src_data,
> - struct dma_resv *resv,
> - struct dma_fence **out_fence,
> - struct dma_fence *dependency,
> - bool consider_clear_status,
> - u64 k_job_id);
> +int amdgpu_ttm_clear_buffer(struct amdgpu_ttm_buffer_entity *entity,
> + struct amdgpu_bo *bo,
> + struct dma_resv *resv,
> + struct dma_fence **out_fence,
> + struct dma_fence *dependency,
> + bool consider_clear_status,
> + u64 k_job_id);
>
> int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
> void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
^ permalink raw reply [flat|nested] 20+ messages in thread