[PATCH v2 06/20] drm/amdgpu: statically assign gart windows to ttm entities

AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
To: "Alex Deucher" <alexander.deucher@amd.com>,
	"Christian König" <christian.koenig@amd.com>,
	"David Airlie" <airlied@gmail.com>,
	"Simona Vetter" <simona@ffwll.ch>,
	"Felix Kuehling" <Felix.Kuehling@amd.com>
Cc: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>,
	<amd-gfx@lists.freedesktop.org>,
	<dri-devel@lists.freedesktop.org>, <linux-kernel@vger.kernel.org>
Subject: [PATCH v2 06/20] drm/amdgpu: statically assign gart windows to ttm entities
Date: Thu, 13 Nov 2025 17:05:47 +0100	[thread overview]
Message-ID: <20251113160632.5889-7-pierre-eric.pelloux-prayer@amd.com> (raw)
In-Reply-To: <20251113160632.5889-1-pierre-eric.pelloux-prayer@amd.com>

If multiple entities share the same window we must make sure
that jobs using them are executed sequentially.

This commit gives separate window id to each entity, so jobs
from multiple entities could execute in parallel if needed.
(for now they all use the first sdma engine, so it makes no
difference yet).

default_entity doesn't get any windows reserved since there is
no use for them.

Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  |  9 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 50 ++++++++++++++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h  |  9 +++--
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  8 ++--
 4 files changed, 46 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 8e2d41c9c271..2a444d02cf4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -686,7 +686,8 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	 * translation. Avoid this by doing the invalidation from the SDMA
 	 * itself at least for GART.
 	 */
-	mutex_lock(&adev->mman.gtt_window_lock);
+	mutex_lock(&adev->mman.clear_entity.gart_window_lock);
+	mutex_lock(&adev->mman.move_entity.gart_window_lock);
 	r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.default_entity.base,
 				     AMDGPU_FENCE_OWNER_UNDEFINED,
 				     16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
@@ -699,7 +700,8 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 	fence = amdgpu_job_submit(job);
-	mutex_unlock(&adev->mman.gtt_window_lock);
+	mutex_unlock(&adev->mman.move_entity.gart_window_lock);
+	mutex_unlock(&adev->mman.clear_entity.gart_window_lock);
 
 	dma_fence_wait(fence, false);
 	dma_fence_put(fence);
@@ -707,7 +709,8 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	return;
 
 error_alloc:
-	mutex_unlock(&adev->mman.gtt_window_lock);
+	mutex_unlock(&adev->mman.move_entity.gart_window_lock);
+	mutex_unlock(&adev->mman.clear_entity.gart_window_lock);
 	dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c8d59ca2b3bd..7193a341689d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -291,7 +291,7 @@ static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
  */
 __attribute__((nonnull))
 static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
-				      struct drm_sched_entity *entity,
+				      struct amdgpu_ttm_buffer_entity *entity,
 				      const struct amdgpu_copy_mem *src,
 				      const struct amdgpu_copy_mem *dst,
 				      uint64_t size, bool tmz,
@@ -314,7 +314,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 	amdgpu_res_first(src->mem, src->offset, size, &src_mm);
 	amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm);
 
-	mutex_lock(&adev->mman.gtt_window_lock);
+	mutex_lock(&entity->gart_window_lock);
 	while (src_mm.remaining) {
 		uint64_t from, to, cur_size, tiling_flags;
 		uint32_t num_type, data_format, max_com, write_compress_disable;
@@ -324,15 +324,15 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 		cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
 
 		/* Map src to window 0 and dst to window 1. */
-		r = amdgpu_ttm_map_buffer(entity,
+		r = amdgpu_ttm_map_buffer(&entity->base,
 					  src->bo, src->mem, &src_mm,
-					  0, ring, tmz, &cur_size, &from);
+					  entity->gart_window_id0, ring, tmz, &cur_size, &from);
 		if (r)
 			goto error;
 
-		r = amdgpu_ttm_map_buffer(entity,
+		r = amdgpu_ttm_map_buffer(&entity->base,
 					  dst->bo, dst->mem, &dst_mm,
-					  1, ring, tmz, &cur_size, &to);
+					  entity->gart_window_id1, ring, tmz, &cur_size, &to);
 		if (r)
 			goto error;
 
@@ -359,7 +359,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 							     write_compress_disable));
 		}
 
-		r = amdgpu_copy_buffer(ring, entity, from, to, cur_size, resv,
+		r = amdgpu_copy_buffer(ring, &entity->base, from, to, cur_size, resv,
 				       &next, true, copy_flags);
 		if (r)
 			goto error;
@@ -371,7 +371,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 		amdgpu_res_next(&dst_mm, cur_size);
 	}
 error:
-	mutex_unlock(&adev->mman.gtt_window_lock);
+	mutex_unlock(&entity->gart_window_lock);
 	*f = fence;
 	return r;
 }
@@ -401,7 +401,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 	dst.offset = 0;
 
 	r = amdgpu_ttm_copy_mem_to_mem(adev,
-				       &adev->mman.move_entity.base,
+				       &adev->mman.move_entity,
 				       &src, &dst,
 				       new_mem->size,
 				       amdgpu_bo_encrypted(abo),
@@ -1893,8 +1893,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	uint64_t gtt_size;
 	int r;
 
-	mutex_init(&adev->mman.gtt_window_lock);
-
 	dma_set_max_seg_size(adev->dev, UINT_MAX);
 	/* No others user of address space so set it to 0 */
 	r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
@@ -2207,6 +2205,15 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
 			drm_sched_entity_destroy(&adev->mman.clear_entity.base);
 			goto error_free_entity;
 		}
+
+		/* Statically assign GART windows to each entity. */
+		mutex_init(&adev->mman.default_entity.gart_window_lock);
+		adev->mman.move_entity.gart_window_id0 = 0;
+		adev->mman.move_entity.gart_window_id1 = 1;
+		mutex_init(&adev->mman.move_entity.gart_window_lock);
+		/* Clearing entity doesn't use id0 */
+		adev->mman.clear_entity.gart_window_id1 = 2;
+		mutex_init(&adev->mman.clear_entity.gart_window_lock);
 	} else {
 		drm_sched_entity_destroy(&adev->mman.default_entity.base);
 		drm_sched_entity_destroy(&adev->mman.clear_entity.base);
@@ -2371,6 +2378,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+	struct amdgpu_ttm_buffer_entity *entity;
 	struct amdgpu_res_cursor cursor;
 	u64 addr;
 	int r = 0;
@@ -2381,11 +2389,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
 	if (!fence)
 		return -EINVAL;
 
+	entity = &adev->mman.clear_entity;
 	*fence = dma_fence_get_stub();
 
 	amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
 
-	mutex_lock(&adev->mman.gtt_window_lock);
+	mutex_lock(&entity->gart_window_lock);
 	while (cursor.remaining) {
 		struct dma_fence *next = NULL;
 		u64 size;
@@ -2398,13 +2407,13 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
 		/* Never clear more than 256MiB at once to avoid timeouts */
 		size = min(cursor.size, 256ULL << 20);
 
-		r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity.base,
+		r = amdgpu_ttm_map_buffer(&entity->base,
 					  &bo->tbo, bo->tbo.resource, &cursor,
-					  1, ring, false, &size, &addr);
+					  entity->gart_window_id1, ring, false, &size, &addr);
 		if (r)
 			goto err;
 
-		r = amdgpu_ttm_fill_mem(ring, &adev->mman.clear_entity.base, 0, addr, size, resv,
+		r = amdgpu_ttm_fill_mem(ring, &entity->base, 0, addr, size, resv,
 					&next, true,
 					AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
 		if (r)
@@ -2416,12 +2425,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
 		amdgpu_res_next(&cursor, size);
 	}
 err:
-	mutex_unlock(&adev->mman.gtt_window_lock);
+	mutex_unlock(&entity->gart_window_lock);
 
 	return r;
 }
 
-int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
+int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
 		       struct amdgpu_bo *bo,
 		       uint32_t src_data,
 		       struct dma_resv *resv,
@@ -2442,7 +2451,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
 
 	amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &dst);
 
-	mutex_lock(&adev->mman.gtt_window_lock);
+	mutex_lock(&entity->gart_window_lock);
 	while (dst.remaining) {
 		struct dma_fence *next;
 		uint64_t cur_size, to;
@@ -2452,7 +2461,8 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
 
 		r = amdgpu_ttm_map_buffer(&entity->base,
 					  &bo->tbo, bo->tbo.resource, &dst,
-					  1, ring, false, &cur_size, &to);
+					  entity->gart_window_id1, ring, false,
+					  &cur_size, &to);
 		if (r)
 			goto error;
 
@@ -2468,7 +2478,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
 		amdgpu_res_next(&dst, cur_size);
 	}
 error:
-	mutex_unlock(&adev->mman.gtt_window_lock);
+	mutex_unlock(&entity->gart_window_lock);
 	if (f)
 		*f = dma_fence_get(fence);
 	dma_fence_put(fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index e1655f86a016..f4f762be9fdd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -39,7 +39,7 @@
 #define __AMDGPU_PL_NUM	(TTM_PL_PRIV + 6)
 
 #define AMDGPU_GTT_MAX_TRANSFER_SIZE	512
-#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS	2
+#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS	3
 
 extern const struct attribute_group amdgpu_vram_mgr_attr_group;
 extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
@@ -54,6 +54,9 @@ struct amdgpu_gtt_mgr {
 
 struct amdgpu_ttm_buffer_entity {
 	struct drm_sched_entity base;
+	struct mutex		gart_window_lock;
+	u32			gart_window_id0;
+	u32			gart_window_id1;
 };
 
 struct amdgpu_mman {
@@ -69,7 +72,7 @@ struct amdgpu_mman {
 
 	struct mutex				gtt_window_lock;
 
-	struct amdgpu_ttm_buffer_entity default_entity;
+	struct amdgpu_ttm_buffer_entity default_entity; /* has no gart windows */
 	struct amdgpu_ttm_buffer_entity clear_entity;
 	struct amdgpu_ttm_buffer_entity move_entity;
 
@@ -177,7 +180,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
 int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
 			    struct dma_resv *resv,
 			    struct dma_fence **fence);
-int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
+int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
 		       struct amdgpu_bo *bo,
 		       uint32_t src_data,
 		       struct dma_resv *resv,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 09756132fa1b..bc47fc362a17 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -60,7 +60,7 @@ svm_migrate_gart_map(struct amdgpu_ring *ring,
 	int r;
 
 	/* use gart window 0 */
-	*gart_addr = adev->gmc.gart_start;
+	*gart_addr = entity->gart_window_id0;
 
 	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
 	num_bytes = npages * 8;
@@ -116,7 +116,7 @@ svm_migrate_gart_map(struct amdgpu_ring *ring,
  * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for
  * the last sdma finish fence which is returned to check copy memory is done.
  *
- * Context: Process context, takes and releases gtt_window_lock
+ * Context: Process context, takes and releases gart_window_lock
  *
  * Return:
  * 0 - OK, otherwise error code
@@ -138,7 +138,7 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
 
 	entity = &adev->mman.move_entity;
 
-	mutex_lock(&adev->mman.gtt_window_lock);
+	mutex_lock(&entity->gart_window_lock);
 
 	while (npages) {
 		size = min(GTT_MAX_PAGES, npages);
@@ -175,7 +175,7 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
 	}
 
 out_unlock:
-	mutex_unlock(&adev->mman.gtt_window_lock);
+	mutex_unlock(&entity->gart_window_lock);
 
 	return r;
 }
-- 
2.43.0

next prev parent reply	other threads:[~2025-11-13 16:09 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-13 16:05 [PATCH v2 00/20] drm/amdgpu: use all SDMA instances for TTM clears and moves Pierre-Eric Pelloux-Prayer
2025-11-13 16:05 ` [PATCH v2 01/20] drm/amdgpu: give each kernel job a unique id Pierre-Eric Pelloux-Prayer
2025-11-14 12:26   ` Christian König
2025-11-14 14:36     ` Pierre-Eric Pelloux-Prayer
2025-11-14 14:57       ` Christian König
2025-11-13 16:05 ` [PATCH v2 02/20] drm/ttm: rework pipelined eviction fence handling Pierre-Eric Pelloux-Prayer
2025-11-14 12:47   ` Christian König
2025-11-18 15:00   ` Thomas Hellström
2025-11-19 14:57     ` Christian König
2025-11-13 16:05 ` [PATCH v2 03/20] drm/amdgpu: remove direct_submit arg from amdgpu_copy_buffer Pierre-Eric Pelloux-Prayer
2025-11-14 12:48   ` Christian König
2025-11-13 16:05 ` [PATCH v2 04/20] drm/amdgpu: introduce amdgpu_ttm_buffer_entity Pierre-Eric Pelloux-Prayer
2025-11-14 12:57   ` Christian König
2025-11-14 20:18     ` Felix Kuehling
2025-11-13 16:05 ` [PATCH v2 05/20] drm/amdgpu: pass the entity to use to ttm functions Pierre-Eric Pelloux-Prayer
2025-11-14 13:07   ` Christian König
2025-11-14 14:41     ` Pierre-Eric Pelloux-Prayer
2025-11-17  9:41       ` Pierre-Eric Pelloux-Prayer
2025-11-14 20:20   ` Felix Kuehling
2025-11-13 16:05 ` Pierre-Eric Pelloux-Prayer [this message]
2025-11-14 15:15   ` [PATCH v2 06/20] drm/amdgpu: statically assign gart windows to ttm entities Christian König
2025-11-14 20:24   ` Felix Kuehling
2025-11-19  9:55     ` Pierre-Eric Pelloux-Prayer
2025-11-13 16:05 ` [PATCH v2 07/20] drm/amdgpu: allocate multiple clear entities Pierre-Eric Pelloux-Prayer
2025-11-17  8:41   ` Christian König
2025-11-13 16:05 ` [PATCH v2 08/20] drm/amdgpu: allocate multiple move entities Pierre-Eric Pelloux-Prayer
2025-11-14 20:57   ` Felix Kuehling
2025-11-13 16:05 ` [PATCH v2 09/20] drm/amdgpu: pass optional dependency to amdgpu_fill_buffer Pierre-Eric Pelloux-Prayer
2025-11-17  8:43   ` Christian König
2025-11-13 16:05 ` [PATCH v2 10/20] drm/admgpu: handle resv dependencies in amdgpu_ttm_map_buffer Pierre-Eric Pelloux-Prayer
2025-11-17  8:44   ` Christian König
2025-11-19  8:28     ` Pierre-Eric Pelloux-Prayer
2025-11-13 16:05 ` [PATCH v2 11/20] drm/amdgpu: round robin through clear_entities in amdgpu_fill_buffer Pierre-Eric Pelloux-Prayer
2025-11-17  8:47   ` Christian König
2025-11-13 16:05 ` [PATCH v2 12/20] drm/amdgpu: use TTM_NUM_MOVE_FENCES when reserving fences Pierre-Eric Pelloux-Prayer
2025-11-14 20:57   ` Felix Kuehling
2025-11-17  9:07   ` Christian König
2025-11-13 16:05 ` [PATCH v2 13/20] drm/amdgpu: use multiple entities in amdgpu_move_blit Pierre-Eric Pelloux-Prayer
2025-11-17  9:12   ` Christian König
2025-11-13 16:05 ` [PATCH v2 14/20] drm/amdgpu: introduce amdgpu_sdma_set_vm_pte_scheds Pierre-Eric Pelloux-Prayer
2025-11-17  9:30   ` Christian König
2025-11-13 16:05 ` [PATCH v2 15/20] drm/amdgpu: pass all the sdma scheds to amdgpu_mman Pierre-Eric Pelloux-Prayer
2025-11-14 21:23   ` Felix Kuehling
2025-11-17  9:46   ` Christian König
2025-11-19  9:34     ` Pierre-Eric Pelloux-Prayer
2025-11-19 10:49       ` Christian König
2025-11-13 16:05 ` [PATCH v2 16/20] drm/amdgpu: give ttm entities access to all the sdma scheds Pierre-Eric Pelloux-Prayer
2025-11-17  9:54   ` Christian König
2025-11-13 16:05 ` [PATCH v2 17/20] drm/amdgpu: get rid of amdgpu_ttm_clear_buffer Pierre-Eric Pelloux-Prayer
2025-11-13 16:05 ` [PATCH v2 18/20] drm/amdgpu: rename amdgpu_fill_buffer as amdgpu_ttm_clear_buffer Pierre-Eric Pelloux-Prayer
2025-11-17  9:56   ` Christian König
2025-11-13 16:06 ` [PATCH v2 19/20] drm/amdgpu: use larger gart window when possible Pierre-Eric Pelloux-Prayer
2025-11-13 16:06 ` [PATCH v2 20/20] drm/amdgpu: double AMDGPU_GTT_MAX_TRANSFER_SIZE Pierre-Eric Pelloux-Prayer

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:8e2d41c9c27 dfblob:2a444d02cf4 dfblob:c8d59ca2b3b
dfblob:7193a341689 dfblob:e1655f86a01 dfblob:f4f762be9fd
dfblob:09756132fa1 dfblob:bc47fc362a1 )
 OR (
bs:"[PATCH v2 06/20] drm/amdgpu: statically assign gart windows to ttm entities" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251113160632.5889-7-pierre-eric.pelloux-prayer@amd.com \
    --to=pierre-eric.pelloux-prayer@amd.com \
    --cc=Felix.Kuehling@amd.com \
    --cc=airlied@gmail.com \
    --cc=alexander.deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=christian.koenig@amd.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=simona@ffwll.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox