[PATCH v5 6/6] drm/amdkfd: Map VRAM MQD on GART

AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Philip Yang <Philip.Yang@amd.com>
To: <amd-gfx@lists.freedesktop.org>
Cc: <Felix.Kuehling@amd.com>, <christian.koenig@amd.com>,
	<david.yatsin@amd.com>, <pierre-eric.pelloux-prayer@amd.com>,
	<kent.russell@amd.com>, Philip Yang <Philip.Yang@amd.com>
Subject: [PATCH v5 6/6] drm/amdkfd: Map VRAM MQD on GART
Date: Tue, 9 Dec 2025 18:43:28 -0500	[thread overview]
Message-ID: <20251209234328.625670-7-Philip.Yang@amd.com> (raw)
In-Reply-To: <20251209234328.625670-1-Philip.Yang@amd.com>

MQD BO on VRAM access via FB aperture is mtype UC uncaching, map
to GART as mtype RW caching, to reduce queue switch latency

Add GART mm_node to kfd mem obj to free the GART entries after
MQD mem obj is freed.

Use resource cursor to handle VRAM resource which maybe on multiple
blocks and use cursor_gart to handle GART entries.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 89 +++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  |  2 +
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  9 ++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  1 +
 5 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 4f8bc7f35cdc..ae4f60aeed14 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -880,6 +880,62 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
 	}
 }
 
+static void amdgpu_ttm_gart_bind_gfx9_mqd_vram(struct amdgpu_device *adev,
+				struct ttm_buffer_object *tbo,
+				struct drm_mm_node *mm_node,
+				uint64_t flags)
+{
+	uint64_t total_pages;
+	int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
+	uint64_t page_idx, pages_per_xcc;
+	struct amdgpu_res_cursor cursor_gart;
+	struct amdgpu_res_cursor cursor;
+	uint64_t ctrl_flags = flags;
+	int i;
+
+	total_pages = tbo->resource->size >> PAGE_SHIFT;
+
+	amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_NC, &ctrl_flags);
+
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 3))
+		amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_RW, &flags);
+
+	pages_per_xcc = total_pages;
+	do_div(pages_per_xcc, num_xcc);
+
+	amdgpu_res_first(NULL, mm_node->start, total_pages, &cursor_gart);
+	amdgpu_res_first(tbo->resource, 0, tbo->resource->size, &cursor);
+
+	for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
+		u64 start_page;
+		u64 npages, n;
+		u64 pa;
+
+		start_page = cursor_gart.start;
+		pa = cursor.start + adev->vm_manager.vram_base_offset;
+		n = 1;
+		amdgpu_gart_map_vram_range(adev, pa, start_page, n,
+					   flags, NULL);
+
+		npages = pages_per_xcc - 1;
+		while (npages) {
+			amdgpu_res_next(&cursor_gart, n);
+			amdgpu_res_next(&cursor, n * PAGE_SIZE);
+
+			start_page = cursor_gart.start;
+			pa = cursor.start + adev->vm_manager.vram_base_offset;
+			n = min3(cursor.size / PAGE_SIZE, cursor_gart.size, npages);
+
+			amdgpu_gart_map_vram_range(adev, pa, start_page, n,
+						   ctrl_flags, NULL);
+
+			npages -= n;
+		}
+		amdgpu_res_next(&cursor_gart, n);
+		amdgpu_res_next(&cursor, n * PAGE_SIZE);
+	}
+}
+
 static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
 				 struct ttm_buffer_object *tbo,
 				 uint64_t flags)
@@ -1017,6 +1073,39 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
 	return 0;
 }
 
+/*
+ * amdgpu_ttm_alloc_gart_vram_bo - Bind VRAM pages to GART mapping
+ *
+ * call amdgpu_ttm_alloc_gart_entries to alloc GART dynamically
+ */
+int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo,
+				  struct drm_mm_node *mm_node,
+				  u64 *gpu_addr)
+{
+	struct ttm_buffer_object *bo = &abo->tbo;
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+	uint64_t flags;
+	int r;
+
+	/* Only for valid VRAM bo resource */
+	if (bo->resource->start == AMDGPU_BO_INVALID_OFFSET)
+		return 0;
+
+	r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr, mm_node,
+					 amdgpu_bo_ngpu_pages(abo),
+					 0, 0, 0);
+	if (r)
+		return r;
+
+	/* compute PTE flags for this buffer object */
+	flags = amdgpu_ttm_tt_pte_flags(adev, NULL, bo->resource);
+	amdgpu_ttm_gart_bind_gfx9_mqd_vram(adev, bo, mm_node, flags);
+	amdgpu_gart_invalidate_tlb(adev);
+
+	*gpu_addr = mm_node->start << PAGE_SHIFT;
+	return 0;
+}
+
 /*
  * amdgpu_ttm_recover_gart - Rebind GTT pages
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 28511e66d364..a8b8a541e21b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -140,7 +140,6 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
 
 bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
 void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr);
 int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
 				 struct drm_mm_node *node,
 				 u64 num_pages, u64 alignment,
@@ -192,6 +191,9 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
 		       u64 k_job_id);
 
 int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
+int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo,
+				  struct drm_mm_node *mm_node,
+				  u64 *gpu_addr);
 void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
 uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index f78b249e1a41..edb72f4ef82d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -225,6 +225,8 @@ void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
 	      struct kfd_mem_obj *mqd_mem_obj)
 {
 	if (mqd_mem_obj->mem) {
+		amdgpu_gtt_mgr_free_entries(&mm->dev->adev->mman.gtt_mgr,
+					    &mqd_mem_obj->mm_node);
 		amdgpu_amdkfd_free_kernel_mem(mm->dev->adev, &mqd_mem_obj->mem);
 		kfree(mqd_mem_obj);
 	} else {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 14123e1a9716..5828220056bd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -148,6 +148,15 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node,
 			kfree(mqd_mem_obj);
 			return NULL;
 		}
+
+		retval = amdgpu_ttm_alloc_gart_vram_bo(mqd_mem_obj->mem,
+						       &mqd_mem_obj->mm_node,
+						       &(mqd_mem_obj->gpu_addr));
+		if (retval) {
+			amdgpu_amdkfd_free_kernel_mem(node->adev, &(mqd_mem_obj->mem));
+			kfree(mqd_mem_obj);
+			return NULL;
+		}
 	} else {
 		retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd),
 				&mqd_mem_obj);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 29419b3249cf..fdde907836fb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -252,6 +252,7 @@ struct kfd_mem_obj {
 	uint64_t gpu_addr;
 	uint32_t *cpu_ptr;
 	void *mem;
+	struct drm_mm_node mm_node;
 };
 
 struct kfd_vmid_info {
-- 
2.50.1

next prev parent reply	other threads:[~2025-12-09 23:44 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-09 23:43 [PATCH v5 0/6] drm/amdkfd: Move gfx9 MQD to HBM Philip Yang
2025-12-09 23:43 ` [PATCH v5 1/6] drm/amdgpu: Fix gfx9 update PTE mtype flag Philip Yang
2025-12-09 23:43 ` [PATCH v5 2/6] drm/amdkfd: Bind MQD in GART with mtype RW Philip Yang
2025-12-09 23:43 ` [PATCH v5 3/6] drm/amdkfd: Add domain parameter to alloc kernel BO Philip Yang
2025-12-09 23:43 ` [PATCH v5 4/6] drm/amdkfd: Move gfx9 MQD to VRAM domain Philip Yang
2025-12-09 23:43 ` [PATCH v5 5/6] drm/amdgpu: Add helper to alloc GART entries Philip Yang
2025-12-10 12:57   ` Pierre-Eric Pelloux-Prayer
2025-12-10 14:05     ` Philip Yang
2025-12-15 15:14   ` Christian König
2025-12-15 15:50     ` Philip Yang
2025-12-09 23:43 ` Philip Yang [this message]
2025-12-15 15:20   ` [PATCH v5 6/6] drm/amdkfd: Map VRAM MQD on GART Christian König
2025-12-15 16:35     ` Philip Yang

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:4f8bc7f35cd dfblob:ae4f60aeed1 dfblob:28511e66d36
dfblob:a8b8a541e21 dfblob:f78b249e1a4 dfblob:edb72f4ef82
dfblob:14123e1a971 dfblob:5828220056b dfblob:29419b3249c
dfblob:fdde907836f )
 OR (
bs:"[PATCH v5 6/6] drm/amdkfd: Map VRAM MQD on GART" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251209234328.625670-7-Philip.Yang@amd.com \
    --to=philip.yang@amd.com \
    --cc=Felix.Kuehling@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=christian.koenig@amd.com \
    --cc=david.yatsin@amd.com \
    --cc=kent.russell@amd.com \
    --cc=pierre-eric.pelloux-prayer@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox