* [PATCH v5 0/6] drm/amdkfd: Move gfx9 MQD to HBM
@ 2025-12-09 23:43 Philip Yang
2025-12-09 23:43 ` [PATCH v5 1/6] drm/amdgpu: Fix gfx9 update PTE mtype flag Philip Yang
` (5 more replies)
0 siblings, 6 replies; 13+ messages in thread
From: Philip Yang @ 2025-12-09 23:43 UTC (permalink / raw)
To: amd-gfx
Cc: Felix.Kuehling, christian.koenig, david.yatsin,
pierre-eric.pelloux-prayer, kent.russell, Philip Yang
To reduce multiple queues switch latency, move user queues MQD to HBM,
and map on GART with mtype RW.
v5:
- patch 1 update commit message
- patch 5 separate GART alloc helpers
- patch 6 use resource cursor to handle GART entries
v4:
- patch 1 remove the executable bit change, cc stable
- patch 5 move GART helper functions to amdgpu_gtt_mgr.c
- add patch 6 to update MQD GART mapping using resource cursor
v3:
- add patch 1 to fix gfx9 mtype update bug
- patch 2 use ASIC specific mtype
- patch 5 use drm mm to alloc GART entries and store in mqd obj
v2:
- patch 4 GART mapping use MC address, vram_base_offset + physical address
Philip Yang (6):
drm/amdgpu: Fix gfx9 update PTE mtype flag
drm/amdkfd: Bind MQD in GART with mtype RW
drm/amdkfd: Add domain parameter to alloc kernel BO
drm/amdkfd: Move gfx9 MQD to VRAM domain
drm/amdgpu: Add helper to alloc GART entries
drm/amdkfd: Map VRAM MQD on GART
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 13 +--
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 6 +-
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 27 ++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 96 ++++++++++++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 11 ++-
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 -
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +-
drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 3 +-
drivers/gpu/drm/amd/amdkfd/kfd_device.c | 13 +--
.../drm/amd/amdkfd/kfd_device_queue_manager.c | 7 +-
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 16 ++--
.../drm/amd/amdkfd/kfd_mqd_manager_v12_1.c | 4 +-
.../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 18 +++-
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 +-
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 +-
.../amd/amdkfd/kfd_process_queue_manager.c | 12 ++-
17 files changed, 195 insertions(+), 49 deletions(-)
--
2.50.1
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v5 1/6] drm/amdgpu: Fix gfx9 update PTE mtype flag
2025-12-09 23:43 [PATCH v5 0/6] drm/amdkfd: Move gfx9 MQD to HBM Philip Yang
@ 2025-12-09 23:43 ` Philip Yang
2025-12-09 23:43 ` [PATCH v5 2/6] drm/amdkfd: Bind MQD in GART with mtype RW Philip Yang
` (4 subsequent siblings)
5 siblings, 0 replies; 13+ messages in thread
From: Philip Yang @ 2025-12-09 23:43 UTC (permalink / raw)
To: amd-gfx
Cc: Felix.Kuehling, christian.koenig, david.yatsin,
pierre-eric.pelloux-prayer, kent.russell, Philip Yang, stable
Fix copy&paste error, that should have been an assignment instead of an or,
otherwise MTYPE_UC 0x3 can not be updated to MTYPE_RW 0x1.
CC stables.
cc: stable@vger.kernel.org
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 97a04e3171f2..205c34eb8d11 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1204,16 +1204,16 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
*flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_NC);
break;
case AMDGPU_VM_MTYPE_WC:
- *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_WC);
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_WC);
break;
case AMDGPU_VM_MTYPE_RW:
- *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_RW);
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_RW);
break;
case AMDGPU_VM_MTYPE_CC:
- *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC);
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC);
break;
case AMDGPU_VM_MTYPE_UC:
- *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_UC);
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_UC);
break;
}
--
2.50.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v5 2/6] drm/amdkfd: Bind MQD in GART with mtype RW
2025-12-09 23:43 [PATCH v5 0/6] drm/amdkfd: Move gfx9 MQD to HBM Philip Yang
2025-12-09 23:43 ` [PATCH v5 1/6] drm/amdgpu: Fix gfx9 update PTE mtype flag Philip Yang
@ 2025-12-09 23:43 ` Philip Yang
2025-12-09 23:43 ` [PATCH v5 3/6] drm/amdkfd: Add domain parameter to alloc kernel BO Philip Yang
` (3 subsequent siblings)
5 siblings, 0 replies; 13+ messages in thread
From: Philip Yang @ 2025-12-09 23:43 UTC (permalink / raw)
To: amd-gfx
Cc: Felix.Kuehling, christian.koenig, david.yatsin,
pierre-eric.pelloux-prayer, kent.russell, Philip Yang
For gfx version >= 9.4.3, bind MQD in GART with mtype RW to enable
caching, to reduce queue switch latency.
Remove the redundant mtype definition in amdgpu_vm.h.
Call amdgpu_gmc_get_vm_pte with AMDGPU_VM_MTYPE_ to get ASIC specific
mtype.
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 7 ++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 ---
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 0f83d140b6ae..4f8bc7f35cdc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -853,7 +853,12 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
uint64_t page_idx, pages_per_xcc;
int i;
- uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
+ uint64_t ctrl_flags = flags;
+
+ amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_NC, &ctrl_flags);
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 3))
+ amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_RW, &flags);
pages_per_xcc = total_pages;
do_div(pages_per_xcc, num_xcc);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 139642eacdd0..e34c8f854b2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -100,9 +100,6 @@ struct amdgpu_bo_vm;
(((uint64_t)(flags) & (~AMDGPU_PTE_MTYPE_VG10_MASK)) | \
AMDGPU_PTE_MTYPE_VG10_SHIFT(mtype))
-#define AMDGPU_MTYPE_NC 0
-#define AMDGPU_MTYPE_CC 2
-
#define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \
| AMDGPU_PTE_SNOOPED \
| AMDGPU_PTE_EXECUTABLE \
--
2.50.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v5 3/6] drm/amdkfd: Add domain parameter to alloc kernel BO
2025-12-09 23:43 [PATCH v5 0/6] drm/amdkfd: Move gfx9 MQD to HBM Philip Yang
2025-12-09 23:43 ` [PATCH v5 1/6] drm/amdgpu: Fix gfx9 update PTE mtype flag Philip Yang
2025-12-09 23:43 ` [PATCH v5 2/6] drm/amdkfd: Bind MQD in GART with mtype RW Philip Yang
@ 2025-12-09 23:43 ` Philip Yang
2025-12-09 23:43 ` [PATCH v5 4/6] drm/amdkfd: Move gfx9 MQD to VRAM domain Philip Yang
` (2 subsequent siblings)
5 siblings, 0 replies; 13+ messages in thread
From: Philip Yang @ 2025-12-09 23:43 UTC (permalink / raw)
To: amd-gfx
Cc: Felix.Kuehling, christian.koenig, david.yatsin,
pierre-eric.pelloux-prayer, kent.russell, Philip Yang
To allocate kernel BO from VRAM domain for MQD in the following patch.
No functional change because kernel BO allocate all from GTT domain.
Rename amdgpu_amdkfd_alloc_gtt_mem to amdgpu_amdkfd_alloc_kernel_mem
Rename amdgpu_amdkfd_free_gtt_mem to amdgpu_amdkfd_free_kernel_mem
Rename mem_kfd_mem_obj gtt_mem to mem
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Kent Russell <kent.russell@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 10 +++++-----
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 6 +++---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +-
drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 3 ++-
drivers/gpu/drm/amd/amdkfd/kfd_device.c | 13 +++++++------
.../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 7 ++++---
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 14 +++++++-------
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c | 4 ++--
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 9 +++++----
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +-
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 +-
.../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 12 +++++++-----
12 files changed, 45 insertions(+), 39 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index a2879d2b7c8e..090d17911bc4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -316,8 +316,8 @@ void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
&adev->kfd.reset_work);
}
-int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
- void **mem_obj, uint64_t *gpu_addr,
+int amdgpu_amdkfd_alloc_kernel_mem(struct amdgpu_device *adev, size_t size,
+ u32 domain, void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool cp_mqd_gfx9)
{
struct amdgpu_bo *bo = NULL;
@@ -328,7 +328,7 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
memset(&bp, 0, sizeof(bp));
bp.size = size;
bp.byte_align = PAGE_SIZE;
- bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+ bp.domain = domain;
bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
bp.type = ttm_bo_type_kernel;
bp.resv = NULL;
@@ -351,7 +351,7 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
goto allocate_mem_reserve_bo_failed;
}
- r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+ r = amdgpu_bo_pin(bo, domain);
if (r) {
dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
goto allocate_mem_pin_bo_failed;
@@ -388,7 +388,7 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
return r;
}
-void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj)
+void amdgpu_amdkfd_free_kernel_mem(struct amdgpu_device *adev, void **mem_obj)
{
struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index da4575676335..274a99eb6d44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -240,10 +240,10 @@ int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
}
#endif
/* Shared API */
-int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
- void **mem_obj, uint64_t *gpu_addr,
+int amdgpu_amdkfd_alloc_kernel_mem(struct amdgpu_device *adev, size_t size,
+ u32 domain, void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool mqd_gfx9);
-void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj);
+void amdgpu_amdkfd_free_kernel_mem(struct amdgpu_device *adev, void **mem_obj);
int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
void **mem_obj);
void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 6585b8b9e709..b029a3e218b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2214,7 +2214,7 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
* @bo_gart: Return bo reference
*
* Before return, bo reference count is incremented. To release the reference and unpin/
- * unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
+ * unmap the BO, call amdgpu_amdkfd_free_kernel_mem.
*/
int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart)
{
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index ba9a09b6589a..63b870918f93 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -357,8 +357,9 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
return 0;
if (!pdd->proc_ctx_cpu_ptr) {
- r = amdgpu_amdkfd_alloc_gtt_mem(adev,
+ r = amdgpu_amdkfd_alloc_kernel_mem(adev,
AMDGPU_MES_PROC_CTX_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
&pdd->proc_ctx_bo,
&pdd->proc_ctx_gpu_addr,
&pdd->proc_ctx_cpu_ptr,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index cf640c9d19bf..0f824bd08fe1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -808,12 +808,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
/* add another 512KB for all other allocations on gart (HPD, fences) */
size += 512 * 1024;
- if (amdgpu_amdkfd_alloc_gtt_mem(
- kfd->adev, size, &kfd->gtt_mem,
+ if (amdgpu_amdkfd_alloc_kernel_mem(
+ kfd->adev, size, AMDGPU_GEM_DOMAIN_GTT,
+ &kfd->gtt_mem,
&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
false)) {
dev_err(kfd_device, "Could not allocate %d bytes\n", size);
- goto alloc_gtt_mem_failure;
+ goto alloc_kernel_mem_failure;
}
dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
@@ -937,8 +938,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd_doorbell_error:
kfd_gtt_sa_fini(kfd);
kfd_gtt_sa_init_error:
- amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem);
-alloc_gtt_mem_failure:
+ amdgpu_amdkfd_free_kernel_mem(kfd->adev, &kfd->gtt_mem);
+alloc_kernel_mem_failure:
dev_err(kfd_device,
"device %x:%x NOT added due to errors\n",
kfd->adev->pdev->vendor, kfd->adev->pdev->device);
@@ -955,7 +956,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
kfd_doorbell_fini(kfd);
ida_destroy(&kfd->doorbell_ida);
kfd_gtt_sa_fini(kfd);
- amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem);
+ amdgpu_amdkfd_free_kernel_mem(kfd->adev, &kfd->gtt_mem);
}
kfree(kfd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 39800280543c..4ab51ae64724 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2905,8 +2905,9 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
(dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size *
NUM_XCC(dqm->dev->xcc_mask));
- retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
- &(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
+ retval = amdgpu_amdkfd_alloc_kernel_mem(dev->adev, size,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &(mem_obj->mem), &(mem_obj->gpu_addr),
(void *)&(mem_obj->cpu_ptr), false);
return retval;
@@ -3047,7 +3048,7 @@ static void deallocate_hiq_sdma_mqd(struct kfd_node *dev,
{
WARN(!mqd, "No hiq sdma mqd trunk to free");
- amdgpu_amdkfd_free_gtt_mem(dev->adev, &mqd->gtt_mem);
+ amdgpu_amdkfd_free_kernel_mem(dev->adev, &mqd->mem);
}
void device_queue_manager_uninit(struct device_queue_manager *dqm)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index d9ae854b6908..f78b249e1a41 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -54,7 +54,7 @@ struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_node *dev, struct queue_properti
if (!mqd_mem_obj)
return NULL;
- mqd_mem_obj->gtt_mem = dev->dqm->hiq_sdma_mqd.gtt_mem;
+ mqd_mem_obj->mem = dev->dqm->hiq_sdma_mqd.mem;
mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr;
mqd_mem_obj->cpu_ptr = dev->dqm->hiq_sdma_mqd.cpu_ptr;
@@ -79,7 +79,7 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_node *dev,
offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size *
NUM_XCC(dev->xcc_mask);
- mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem
+ mqd_mem_obj->mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.mem
+ offset);
mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset;
mqd_mem_obj->cpu_ptr = (uint32_t *)((uint64_t)
@@ -91,7 +91,7 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_node *dev,
void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
- WARN_ON(!mqd_mem_obj->gtt_mem);
+ WARN_ON(!mqd_mem_obj->mem);
kfree(mqd_mem_obj);
}
@@ -224,8 +224,8 @@ int kfd_destroy_mqd_cp(struct mqd_manager *mm, void *mqd,
void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
- if (mqd_mem_obj->gtt_mem) {
- amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, &mqd_mem_obj->gtt_mem);
+ if (mqd_mem_obj->mem) {
+ amdgpu_amdkfd_free_kernel_mem(mm->dev->adev, &mqd_mem_obj->mem);
kfree(mqd_mem_obj);
} else {
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
@@ -280,8 +280,8 @@ void kfd_get_hiq_xcc_mqd(struct kfd_node *dev, struct kfd_mem_obj *mqd_mem_obj,
offset = kfd_hiq_mqd_stride(dev) * virtual_xcc_id;
- mqd_mem_obj->gtt_mem = (virtual_xcc_id == 0) ?
- dev->dqm->hiq_sdma_mqd.gtt_mem : NULL;
+ mqd_mem_obj->mem = (virtual_xcc_id == 0) ?
+ dev->dqm->hiq_sdma_mqd.mem : NULL;
mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset;
mqd_mem_obj->cpu_ptr = (uint32_t *)((uintptr_t)
dev->dqm->hiq_sdma_mqd.cpu_ptr + offset);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c
index 06ecc86fcb4c..22f51df6f174 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c
@@ -370,8 +370,8 @@ static void get_xcc_mqd(struct kfd_mem_obj *mqd_mem_obj,
struct kfd_mem_obj *xcc_mqd_mem_obj,
uint64_t offset)
{
- xcc_mqd_mem_obj->gtt_mem = (offset == 0) ?
- mqd_mem_obj->gtt_mem : NULL;
+ xcc_mqd_mem_obj->mem = (offset == 0) ?
+ mqd_mem_obj->mem : NULL;
xcc_mqd_mem_obj->gpu_addr = mqd_mem_obj->gpu_addr + offset;
xcc_mqd_mem_obj->cpu_ptr = (uint32_t *)((uintptr_t)mqd_mem_obj->cpu_ptr
+ offset);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 2e9b6bcf2704..d234db138182 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -135,11 +135,12 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node,
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
if (!mqd_mem_obj)
return NULL;
- retval = amdgpu_amdkfd_alloc_gtt_mem(node->adev,
+ retval = amdgpu_amdkfd_alloc_kernel_mem(node->adev,
(ALIGN(q->ctl_stack_size, PAGE_SIZE) +
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE)) *
NUM_XCC(node->xcc_mask),
- &(mqd_mem_obj->gtt_mem),
+ AMDGPU_GEM_DOMAIN_GTT,
+ &(mqd_mem_obj->mem),
&(mqd_mem_obj->gpu_addr),
(void *)&(mqd_mem_obj->cpu_ptr), true);
@@ -665,8 +666,8 @@ static void get_xcc_mqd(struct kfd_mem_obj *mqd_mem_obj,
struct kfd_mem_obj *xcc_mqd_mem_obj,
uint64_t offset)
{
- xcc_mqd_mem_obj->gtt_mem = (offset == 0) ?
- mqd_mem_obj->gtt_mem : NULL;
+ xcc_mqd_mem_obj->mem = (offset == 0) ?
+ mqd_mem_obj->mem : NULL;
xcc_mqd_mem_obj->gpu_addr = mqd_mem_obj->gpu_addr + offset;
xcc_mqd_mem_obj->cpu_ptr = (uint32_t *)((uintptr_t)mqd_mem_obj->cpu_ptr
+ offset);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 12f640a9370a..29419b3249cf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -251,7 +251,7 @@ struct kfd_mem_obj {
uint32_t range_end;
uint64_t gpu_addr;
uint32_t *cpu_ptr;
- void *gtt_mem;
+ void *mem;
};
struct kfd_vmid_info {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index aec7522407db..b3d7c545f7b9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1131,7 +1131,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
if (pdd->dev->kfd->shared_resources.enable_mes &&
pdd->proc_ctx_cpu_ptr)
- amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
+ amdgpu_amdkfd_free_kernel_mem(pdd->dev->adev,
&pdd->proc_ctx_bo);
/*
* before destroying pdd, make sure to report availability
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 5f8cda4733f9..232103742712 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -209,8 +209,8 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
}
if (dev->kfd->shared_resources.enable_mes) {
- amdgpu_amdkfd_free_gtt_mem(dev->adev, &pqn->q->gang_ctx_bo);
- amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo_gart);
+ amdgpu_amdkfd_free_kernel_mem(dev->adev, &pqn->q->gang_ctx_bo);
+ amdgpu_amdkfd_free_kernel_mem(dev->adev, (void **)&pqn->q->wptr_bo_gart);
}
}
@@ -264,8 +264,9 @@ static int init_user_queue(struct process_queue_manager *pqm,
(*q)->process = pqm->process;
if (dev->kfd->shared_resources.enable_mes) {
- retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
+ retval = amdgpu_amdkfd_alloc_kernel_mem(dev->adev,
AMDGPU_MES_GANG_CTX_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
&(*q)->gang_ctx_bo,
&(*q)->gang_ctx_gpu_addr,
&(*q)->gang_ctx_cpu_ptr,
@@ -297,7 +298,7 @@ static int init_user_queue(struct process_queue_manager *pqm,
return 0;
free_gang_ctx_bo:
- amdgpu_amdkfd_free_gtt_mem(dev->adev, &(*q)->gang_ctx_bo);
+ amdgpu_amdkfd_free_kernel_mem(dev->adev, &(*q)->gang_ctx_bo);
cleanup:
uninit_queue(*q);
*q = NULL;
@@ -367,8 +368,9 @@ int pqm_create_queue(struct process_queue_manager *pqm,
/* Allocate proc_ctx_bo only if MES is enabled and this is the first queue */
if (!pdd->proc_ctx_cpu_ptr && dev->kfd->shared_resources.enable_mes) {
- retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
+ retval = amdgpu_amdkfd_alloc_kernel_mem(dev->adev,
AMDGPU_MES_PROC_CTX_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
&pdd->proc_ctx_bo,
&pdd->proc_ctx_gpu_addr,
&pdd->proc_ctx_cpu_ptr,
--
2.50.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v5 4/6] drm/amdkfd: Move gfx9 MQD to VRAM domain
2025-12-09 23:43 [PATCH v5 0/6] drm/amdkfd: Move gfx9 MQD to HBM Philip Yang
` (2 preceding siblings ...)
2025-12-09 23:43 ` [PATCH v5 3/6] drm/amdkfd: Add domain parameter to alloc kernel BO Philip Yang
@ 2025-12-09 23:43 ` Philip Yang
2025-12-09 23:43 ` [PATCH v5 5/6] drm/amdgpu: Add helper to alloc GART entries Philip Yang
2025-12-09 23:43 ` [PATCH v5 6/6] drm/amdkfd: Map VRAM MQD on GART Philip Yang
5 siblings, 0 replies; 13+ messages in thread
From: Philip Yang @ 2025-12-09 23:43 UTC (permalink / raw)
To: amd-gfx
Cc: Felix.Kuehling, christian.koenig, david.yatsin,
pierre-eric.pelloux-prayer, kent.russell, Philip Yang
To reduce queue switch latency further, move MQD to VRAM domain,
CP access MQD and control stack via FB aperture, this requires
contiguous pages.
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 3 ++-
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 090d17911bc4..113c058cf7b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -329,7 +329,8 @@ int amdgpu_amdkfd_alloc_kernel_mem(struct amdgpu_device *adev, size_t size,
bp.size = size;
bp.byte_align = PAGE_SIZE;
bp.domain = domain;
- bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+ bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC;
bp.type = ttm_bo_type_kernel;
bp.resv = NULL;
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index d234db138182..14123e1a9716 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -139,7 +139,7 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node,
(ALIGN(q->ctl_stack_size, PAGE_SIZE) +
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE)) *
NUM_XCC(node->xcc_mask),
- AMDGPU_GEM_DOMAIN_GTT,
+ AMDGPU_GEM_DOMAIN_VRAM,
&(mqd_mem_obj->mem),
&(mqd_mem_obj->gpu_addr),
(void *)&(mqd_mem_obj->cpu_ptr), true);
--
2.50.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v5 5/6] drm/amdgpu: Add helper to alloc GART entries
2025-12-09 23:43 [PATCH v5 0/6] drm/amdkfd: Move gfx9 MQD to HBM Philip Yang
` (3 preceding siblings ...)
2025-12-09 23:43 ` [PATCH v5 4/6] drm/amdkfd: Move gfx9 MQD to VRAM domain Philip Yang
@ 2025-12-09 23:43 ` Philip Yang
2025-12-10 12:57 ` Pierre-Eric Pelloux-Prayer
2025-12-15 15:14 ` Christian König
2025-12-09 23:43 ` [PATCH v5 6/6] drm/amdkfd: Map VRAM MQD on GART Philip Yang
5 siblings, 2 replies; 13+ messages in thread
From: Philip Yang @ 2025-12-09 23:43 UTC (permalink / raw)
To: amd-gfx
Cc: Felix.Kuehling, christian.koenig, david.yatsin,
pierre-eric.pelloux-prayer, kent.russell, Philip Yang
Add helper amdgpu_gtt_mgr_alloc/free_entries, export the configurable drm_mm
allocator parameters to caller.
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 27 +++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 7 ++++++
2 files changed, 34 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 895c1e4c6747..d21c7187e4aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -321,3 +321,30 @@ void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
ttm_resource_manager_cleanup(man);
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
}
+
+int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
+ struct drm_mm_node *node,
+ u64 num_pages, u64 alignment,
+ unsigned long color,
+ enum drm_mm_insert_mode mode)
+{
+ struct amdgpu_device *adev = container_of(mgr, typeof(*adev), mman.gtt_mgr);
+ int r;
+
+ spin_lock(&mgr->lock);
+ r = drm_mm_insert_node_in_range(&mgr->mm, node, num_pages,
+ alignment, color, 0,
+ adev->gmc.gart_size >> PAGE_SHIFT,
+ mode);
+ spin_unlock(&mgr->lock);
+ return r;
+}
+
+void amdgpu_gtt_mgr_free_entries(struct amdgpu_gtt_mgr *mgr,
+ struct drm_mm_node *mm_node)
+{
+ spin_lock(&mgr->lock);
+ if (drm_mm_node_allocated(mm_node))
+ drm_mm_remove_node(mm_node);
+ spin_unlock(&mgr->lock);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 72488124aa59..28511e66d364 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -141,6 +141,13 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr);
+int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
+ struct drm_mm_node *node,
+ u64 num_pages, u64 alignment,
+ unsigned long color,
+ enum drm_mm_insert_mode mode);
+void amdgpu_gtt_mgr_free_entries(struct amdgpu_gtt_mgr *mgr,
+ struct drm_mm_node *mm_node);
uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man);
u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);
--
2.50.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v5 6/6] drm/amdkfd: Map VRAM MQD on GART
2025-12-09 23:43 [PATCH v5 0/6] drm/amdkfd: Move gfx9 MQD to HBM Philip Yang
` (4 preceding siblings ...)
2025-12-09 23:43 ` [PATCH v5 5/6] drm/amdgpu: Add helper to alloc GART entries Philip Yang
@ 2025-12-09 23:43 ` Philip Yang
2025-12-15 15:20 ` Christian König
5 siblings, 1 reply; 13+ messages in thread
From: Philip Yang @ 2025-12-09 23:43 UTC (permalink / raw)
To: amd-gfx
Cc: Felix.Kuehling, christian.koenig, david.yatsin,
pierre-eric.pelloux-prayer, kent.russell, Philip Yang
MQD BO on VRAM access via FB aperture is mtype UC uncaching, map
to GART as mtype RW caching, to reduce queue switch latency
Add GART mm_node to kfd mem obj to free the GART entries after
MQD mem obj is freed.
Use resource cursor to handle VRAM resource which maybe on multiple
blocks and use cursor_gart to handle GART entries.
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 89 +++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 +-
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 2 +
.../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 9 ++
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 +
5 files changed, 104 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 4f8bc7f35cdc..ae4f60aeed14 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -880,6 +880,62 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
}
}
+static void amdgpu_ttm_gart_bind_gfx9_mqd_vram(struct amdgpu_device *adev,
+ struct ttm_buffer_object *tbo,
+ struct drm_mm_node *mm_node,
+ uint64_t flags)
+{
+ uint64_t total_pages;
+ int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
+ uint64_t page_idx, pages_per_xcc;
+ struct amdgpu_res_cursor cursor_gart;
+ struct amdgpu_res_cursor cursor;
+ uint64_t ctrl_flags = flags;
+ int i;
+
+ total_pages = tbo->resource->size >> PAGE_SHIFT;
+
+ amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_NC, &ctrl_flags);
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 3))
+ amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_RW, &flags);
+
+ pages_per_xcc = total_pages;
+ do_div(pages_per_xcc, num_xcc);
+
+ amdgpu_res_first(NULL, mm_node->start, total_pages, &cursor_gart);
+ amdgpu_res_first(tbo->resource, 0, tbo->resource->size, &cursor);
+
+ for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
+ u64 start_page;
+ u64 npages, n;
+ u64 pa;
+
+ start_page = cursor_gart.start;
+ pa = cursor.start + adev->vm_manager.vram_base_offset;
+ n = 1;
+ amdgpu_gart_map_vram_range(adev, pa, start_page, n,
+ flags, NULL);
+
+ npages = pages_per_xcc - 1;
+ while (npages) {
+ amdgpu_res_next(&cursor_gart, n);
+ amdgpu_res_next(&cursor, n * PAGE_SIZE);
+
+ start_page = cursor_gart.start;
+ pa = cursor.start + adev->vm_manager.vram_base_offset;
+ n = min3(cursor.size / PAGE_SIZE, cursor_gart.size, npages);
+
+ amdgpu_gart_map_vram_range(adev, pa, start_page, n,
+ ctrl_flags, NULL);
+
+ npages -= n;
+ }
+ amdgpu_res_next(&cursor_gart, n);
+ amdgpu_res_next(&cursor, n * PAGE_SIZE);
+ }
+}
+
static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
struct ttm_buffer_object *tbo,
uint64_t flags)
@@ -1017,6 +1073,39 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
return 0;
}
+/*
+ * amdgpu_ttm_alloc_gart_vram_bo - Bind VRAM pages to GART mapping
+ *
+ * call amdgpu_ttm_alloc_gart_entries to alloc GART dynamically
+ */
+int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo,
+ struct drm_mm_node *mm_node,
+ u64 *gpu_addr)
+{
+ struct ttm_buffer_object *bo = &abo->tbo;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+ uint64_t flags;
+ int r;
+
+ /* Only for valid VRAM bo resource */
+ if (bo->resource->start == AMDGPU_BO_INVALID_OFFSET)
+ return 0;
+
+ r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr, mm_node,
+ amdgpu_bo_ngpu_pages(abo),
+ 0, 0, 0);
+ if (r)
+ return r;
+
+ /* compute PTE flags for this buffer object */
+ flags = amdgpu_ttm_tt_pte_flags(adev, NULL, bo->resource);
+ amdgpu_ttm_gart_bind_gfx9_mqd_vram(adev, bo, mm_node, flags);
+ amdgpu_gart_invalidate_tlb(adev);
+
+ *gpu_addr = mm_node->start << PAGE_SHIFT;
+ return 0;
+}
+
/*
* amdgpu_ttm_recover_gart - Rebind GTT pages
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 28511e66d364..a8b8a541e21b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -140,7 +140,6 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr);
int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
struct drm_mm_node *node,
u64 num_pages, u64 alignment,
@@ -192,6 +191,9 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
u64 k_job_id);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
+int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo,
+ struct drm_mm_node *mm_node,
+ u64 *gpu_addr);
void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index f78b249e1a41..edb72f4ef82d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -225,6 +225,8 @@ void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
if (mqd_mem_obj->mem) {
+ amdgpu_gtt_mgr_free_entries(&mm->dev->adev->mman.gtt_mgr,
+ &mqd_mem_obj->mm_node);
amdgpu_amdkfd_free_kernel_mem(mm->dev->adev, &mqd_mem_obj->mem);
kfree(mqd_mem_obj);
} else {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 14123e1a9716..5828220056bd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -148,6 +148,15 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node,
kfree(mqd_mem_obj);
return NULL;
}
+
+ retval = amdgpu_ttm_alloc_gart_vram_bo(mqd_mem_obj->mem,
+ &mqd_mem_obj->mm_node,
+ &(mqd_mem_obj->gpu_addr));
+ if (retval) {
+ amdgpu_amdkfd_free_kernel_mem(node->adev, &(mqd_mem_obj->mem));
+ kfree(mqd_mem_obj);
+ return NULL;
+ }
} else {
retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd),
&mqd_mem_obj);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 29419b3249cf..fdde907836fb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -252,6 +252,7 @@ struct kfd_mem_obj {
uint64_t gpu_addr;
uint32_t *cpu_ptr;
void *mem;
+ struct drm_mm_node mm_node;
};
struct kfd_vmid_info {
--
2.50.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v5 5/6] drm/amdgpu: Add helper to alloc GART entries
2025-12-09 23:43 ` [PATCH v5 5/6] drm/amdgpu: Add helper to alloc GART entries Philip Yang
@ 2025-12-10 12:57 ` Pierre-Eric Pelloux-Prayer
2025-12-10 14:05 ` Philip Yang
2025-12-15 15:14 ` Christian König
1 sibling, 1 reply; 13+ messages in thread
From: Pierre-Eric Pelloux-Prayer @ 2025-12-10 12:57 UTC (permalink / raw)
To: Philip Yang, amd-gfx
Cc: Felix.Kuehling, christian.koenig, david.yatsin,
pierre-eric.pelloux-prayer, kent.russell
Hi,
Le 10/12/2025 à 00:43, Philip Yang a écrit :
> Add helper amdgpu_gtt_mgr_alloc/free_entries, export the configurable drm_mm
> allocator parameters to caller.
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 27 +++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 7 ++++++
> 2 files changed, 34 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> index 895c1e4c6747..d21c7187e4aa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> @@ -321,3 +321,30 @@ void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
> ttm_resource_manager_cleanup(man);
> ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
> }
> +
> +int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
> + struct drm_mm_node *node,
> + u64 num_pages, u64 alignment,
I would drop the alignment argument since all users are going to pass 0 for now.
> + unsigned long color,
As discussed offline, my version of these helpers also exposed the color arg to
be able to distinguish between node's with a BO (color = 0) and the ones without
a BO (color = 1). This is useful in amdgpu_gtt_mgr_recover() because for the
latter we can't do:
struct drm_range_mgr_node *node = container_of(mm_node, ...);
To avoid modifying again the same code, I'd suggest to:
1) add a define ("#define GART_ENTRY_WITOUT_BO_COLOR 1" ?) and use it as the
color inside your helper
2) remove the color argument
3) update amdgpu_gtt_mgr_recover() to skip nodes with this color
Thanks,
Pierre-Eric
> + enum drm_mm_insert_mode mode)
> +{
> + struct amdgpu_device *adev = container_of(mgr, typeof(*adev), mman.gtt_mgr);
> + int r;
> +
> + spin_lock(&mgr->lock);
> + r = drm_mm_insert_node_in_range(&mgr->mm, node, num_pages,
> + alignment, color, 0,
> + adev->gmc.gart_size >> PAGE_SHIFT,
> + mode);
> + spin_unlock(&mgr->lock);
> + return r;
> +}
> +
> +void amdgpu_gtt_mgr_free_entries(struct amdgpu_gtt_mgr *mgr,
> + struct drm_mm_node *mm_node)
> +{
> + spin_lock(&mgr->lock);
> + if (drm_mm_node_allocated(mm_node))
> + drm_mm_remove_node(mm_node);
> + spin_unlock(&mgr->lock);
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 72488124aa59..28511e66d364 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -141,6 +141,13 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
> bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
> void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr);
>
> +int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
> + struct drm_mm_node *node,
> + u64 num_pages, u64 alignment,
> + unsigned long color,
> + enum drm_mm_insert_mode mode);
> +void amdgpu_gtt_mgr_free_entries(struct amdgpu_gtt_mgr *mgr,
> + struct drm_mm_node *mm_node);
> uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man);
>
> u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v5 5/6] drm/amdgpu: Add helper to alloc GART entries
2025-12-10 12:57 ` Pierre-Eric Pelloux-Prayer
@ 2025-12-10 14:05 ` Philip Yang
0 siblings, 0 replies; 13+ messages in thread
From: Philip Yang @ 2025-12-10 14:05 UTC (permalink / raw)
To: Pierre-Eric Pelloux-Prayer, Philip Yang, amd-gfx
Cc: Felix.Kuehling, christian.koenig, david.yatsin,
pierre-eric.pelloux-prayer, kent.russell
On 2025-12-10 07:57, Pierre-Eric Pelloux-Prayer wrote:
> Hi,
>
> Le 10/12/2025 à 00:43, Philip Yang a écrit :
>> Add helper amdgpu_gtt_mgr_alloc/free_entries, export the configurable
>> drm_mm
>> allocator parameters to caller.
>>
>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 27 +++++++++++++++++++++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 7 ++++++
>> 2 files changed, 34 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>> index 895c1e4c6747..d21c7187e4aa 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>> @@ -321,3 +321,30 @@ void amdgpu_gtt_mgr_fini(struct amdgpu_device
>> *adev)
>> ttm_resource_manager_cleanup(man);
>> ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
>> }
>> +
>> +int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
>> + struct drm_mm_node *node,
>> + u64 num_pages, u64 alignment,
>
> I would drop the alignment argument since all users are going to pass
> 0 for now.
ok, we only need page align for GART address
>
>> + unsigned long color,
>
> As discussed offline, my version of these helpers also exposed the
> color arg to be able to distinguish between node's with a BO (color =
> 0) and the ones without a BO (color = 1). This is useful in
> amdgpu_gtt_mgr_recover() because for the latter we can't do:
>
> struct drm_range_mgr_node *node = container_of(mm_node, ...);
>
> To avoid modifying again the same code, I'd suggest to:
> 1) add a define ("#define GART_ENTRY_WITOUT_BO_COLOR 1" ?) and use it
> as the color inside your helper
> 2) remove the color argument
> 3) update amdgpu_gtt_mgr_recover() to skip nodes with this color
I will make the changes in next version.
Thanks,
Philip
>
> Thanks,
> Pierre-Eric
>
>> + enum drm_mm_insert_mode mode)
>> +{
>> + struct amdgpu_device *adev = container_of(mgr, typeof(*adev),
>> mman.gtt_mgr);
>> + int r;
>> +
>> + spin_lock(&mgr->lock);
>> + r = drm_mm_insert_node_in_range(&mgr->mm, node, num_pages,
>> + alignment, color, 0,
>> + adev->gmc.gart_size >> PAGE_SHIFT,
>> + mode);
>> + spin_unlock(&mgr->lock);
>> + return r;
>> +}
>> +
>> +void amdgpu_gtt_mgr_free_entries(struct amdgpu_gtt_mgr *mgr,
>> + struct drm_mm_node *mm_node)
>> +{
>> + spin_lock(&mgr->lock);
>> + if (drm_mm_node_allocated(mm_node))
>> + drm_mm_remove_node(mm_node);
>> + spin_unlock(&mgr->lock);
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index 72488124aa59..28511e66d364 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -141,6 +141,13 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device
>> *adev);
>> bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
>> void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr);
>> +int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
>> + struct drm_mm_node *node,
>> + u64 num_pages, u64 alignment,
>> + unsigned long color,
>> + enum drm_mm_insert_mode mode);
>> +void amdgpu_gtt_mgr_free_entries(struct amdgpu_gtt_mgr *mgr,
>> + struct drm_mm_node *mm_node);
>> uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man);
>> u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v5 5/6] drm/amdgpu: Add helper to alloc GART entries
2025-12-09 23:43 ` [PATCH v5 5/6] drm/amdgpu: Add helper to alloc GART entries Philip Yang
2025-12-10 12:57 ` Pierre-Eric Pelloux-Prayer
@ 2025-12-15 15:14 ` Christian König
2025-12-15 15:50 ` Philip Yang
1 sibling, 1 reply; 13+ messages in thread
From: Christian König @ 2025-12-15 15:14 UTC (permalink / raw)
To: Philip Yang, amd-gfx
Cc: Felix.Kuehling, david.yatsin, pierre-eric.pelloux-prayer,
kent.russell
On 12/10/25 00:43, Philip Yang wrote:
> Add helper amdgpu_gtt_mgr_alloc/free_entries, export the configurable drm_mm
> allocator parameters to caller.
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 27 +++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 7 ++++++
> 2 files changed, 34 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> index 895c1e4c6747..d21c7187e4aa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> @@ -321,3 +321,30 @@ void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
> ttm_resource_manager_cleanup(man);
> ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
> }
> +
> +int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
> + struct drm_mm_node *node,
> + u64 num_pages, u64 alignment,
> + unsigned long color,
> + enum drm_mm_insert_mode mode)
The color is unused as far as I remember and the insert mode should be hardcoded, at least I don't see a good reason to expose that.
Any specific reason that was added here?
Regards,
Christian.
> +{
> + struct amdgpu_device *adev = container_of(mgr, typeof(*adev), mman.gtt_mgr);
> + int r;
> +
> + spin_lock(&mgr->lock);
> + r = drm_mm_insert_node_in_range(&mgr->mm, node, num_pages,
> + alignment, color, 0,
> + adev->gmc.gart_size >> PAGE_SHIFT,
> + mode);
> + spin_unlock(&mgr->lock);
> + return r;
> +}
> +
> +void amdgpu_gtt_mgr_free_entries(struct amdgpu_gtt_mgr *mgr,
> + struct drm_mm_node *mm_node)
> +{
> + spin_lock(&mgr->lock);
> + if (drm_mm_node_allocated(mm_node))
> + drm_mm_remove_node(mm_node);
> + spin_unlock(&mgr->lock);
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 72488124aa59..28511e66d364 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -141,6 +141,13 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
> bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
> void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr);
>
> +int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
> + struct drm_mm_node *node,
> + u64 num_pages, u64 alignment,
> + unsigned long color,
> + enum drm_mm_insert_mode mode);
> +void amdgpu_gtt_mgr_free_entries(struct amdgpu_gtt_mgr *mgr,
> + struct drm_mm_node *mm_node);
> uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man);
>
> u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v5 6/6] drm/amdkfd: Map VRAM MQD on GART
2025-12-09 23:43 ` [PATCH v5 6/6] drm/amdkfd: Map VRAM MQD on GART Philip Yang
@ 2025-12-15 15:20 ` Christian König
2025-12-15 16:35 ` Philip Yang
0 siblings, 1 reply; 13+ messages in thread
From: Christian König @ 2025-12-15 15:20 UTC (permalink / raw)
To: Philip Yang, amd-gfx
Cc: Felix.Kuehling, david.yatsin, pierre-eric.pelloux-prayer,
kent.russell
On 12/10/25 00:43, Philip Yang wrote:
> MQD BO on VRAM access via FB aperture is mtype UC uncaching, map
> to GART as mtype RW caching, to reduce queue switch latency
>
> Add GART mm_node to kfd mem obj to free the GART entries after
> MQD mem obj is freed.
>
> Use resource cursor to handle VRAM resource which maybe on multiple
> blocks and use cursor_gart to handle GART entries.
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 89 +++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 +-
> drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 2 +
> .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 9 ++
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 +
> 5 files changed, 104 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 4f8bc7f35cdc..ae4f60aeed14 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -880,6 +880,62 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
> }
> }
>
> +static void amdgpu_ttm_gart_bind_gfx9_mqd_vram(struct amdgpu_device *adev,
> + struct ttm_buffer_object *tbo,
> + struct drm_mm_node *mm_node,
> + uint64_t flags)
> +{
> + uint64_t total_pages;
> + int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
> + uint64_t page_idx, pages_per_xcc;
> + struct amdgpu_res_cursor cursor_gart;
> + struct amdgpu_res_cursor cursor;
> + uint64_t ctrl_flags = flags;
> + int i;
> +
> + total_pages = tbo->resource->size >> PAGE_SHIFT;
Please use tbo->base.size instead.
And it would be nicer if the calculation was in bytes and not pages, but not a must have.
> +
> + amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_NC, &ctrl_flags);
> +
> + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 3))
> + amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_RW, &flags);
> +
> + pages_per_xcc = total_pages;
> + do_div(pages_per_xcc, num_xcc);
> +
> + amdgpu_res_first(NULL, mm_node->start, total_pages, &cursor_gart);
> + amdgpu_res_first(tbo->resource, 0, tbo->resource->size, &cursor);
> +
> + for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
> + u64 start_page;
> + u64 npages, n;
> + u64 pa;
> +
> + start_page = cursor_gart.start;
> + pa = cursor.start + adev->vm_manager.vram_base_offset;
> + n = 1;
> + amdgpu_gart_map_vram_range(adev, pa, start_page, n,
> + flags, NULL);
> +
> + npages = pages_per_xcc - 1;
> + while (npages) {
> + amdgpu_res_next(&cursor_gart, n);
> + amdgpu_res_next(&cursor, n * PAGE_SIZE);
> +
> + start_page = cursor_gart.start;
> + pa = cursor.start + adev->vm_manager.vram_base_offset;
> + n = min3(cursor.size / PAGE_SIZE, cursor_gart.size, npages);
> +
> + amdgpu_gart_map_vram_range(adev, pa, start_page, n,
> + ctrl_flags, NULL);
> +
> + npages -= n;
> + }
> + amdgpu_res_next(&cursor_gart, n);
> + amdgpu_res_next(&cursor, n * PAGE_SIZE);
> + }
> +}
> +
> static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
> struct ttm_buffer_object *tbo,
> uint64_t flags)
> @@ -1017,6 +1073,39 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
> return 0;
> }
>
> +/*
> + * amdgpu_ttm_alloc_gart_vram_bo - Bind VRAM pages to GART mapping
> + *
> + * call amdgpu_ttm_alloc_gart_entries to alloc GART dynamically
> + */
> +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo,
> + struct drm_mm_node *mm_node,
> + u64 *gpu_addr)
> +{
> + struct ttm_buffer_object *bo = &abo->tbo;
> + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> + uint64_t flags;
> + int r;
> +
> + /* Only for valid VRAM bo resource */
> + if (bo->resource->start == AMDGPU_BO_INVALID_OFFSET)
> + return 0;
Please drop that check. We really shouldn't touch bo->resource->start any more.
Apart from that looks reasonable to me, but I'm wondering if GART re-creation after GPU recovery will still work or not.
@Pierre-Eric could you double check that?
Regards,
Christian.
> +
> + r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr, mm_node,
> + amdgpu_bo_ngpu_pages(abo),
> + 0, 0, 0);
> + if (r)
> + return r;
> +
> + /* compute PTE flags for this buffer object */
> + flags = amdgpu_ttm_tt_pte_flags(adev, NULL, bo->resource);
> + amdgpu_ttm_gart_bind_gfx9_mqd_vram(adev, bo, mm_node, flags);
> + amdgpu_gart_invalidate_tlb(adev);
> +
> + *gpu_addr = mm_node->start << PAGE_SHIFT;
> + return 0;
> +}
> +
> /*
> * amdgpu_ttm_recover_gart - Rebind GTT pages
> *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 28511e66d364..a8b8a541e21b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -140,7 +140,6 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
>
> bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
> void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr);
> int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
> struct drm_mm_node *node,
> u64 num_pages, u64 alignment,
> @@ -192,6 +191,9 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
> u64 k_job_id);
>
> int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
> +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo,
> + struct drm_mm_node *mm_node,
> + u64 *gpu_addr);
> void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
> uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> index f78b249e1a41..edb72f4ef82d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> @@ -225,6 +225,8 @@ void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
> struct kfd_mem_obj *mqd_mem_obj)
> {
> if (mqd_mem_obj->mem) {
> + amdgpu_gtt_mgr_free_entries(&mm->dev->adev->mman.gtt_mgr,
> + &mqd_mem_obj->mm_node);
> amdgpu_amdkfd_free_kernel_mem(mm->dev->adev, &mqd_mem_obj->mem);
> kfree(mqd_mem_obj);
> } else {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> index 14123e1a9716..5828220056bd 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> @@ -148,6 +148,15 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node,
> kfree(mqd_mem_obj);
> return NULL;
> }
> +
> + retval = amdgpu_ttm_alloc_gart_vram_bo(mqd_mem_obj->mem,
> + &mqd_mem_obj->mm_node,
> + &(mqd_mem_obj->gpu_addr));
> + if (retval) {
> + amdgpu_amdkfd_free_kernel_mem(node->adev, &(mqd_mem_obj->mem));
> + kfree(mqd_mem_obj);
> + return NULL;
> + }
> } else {
> retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd),
> &mqd_mem_obj);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 29419b3249cf..fdde907836fb 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -252,6 +252,7 @@ struct kfd_mem_obj {
> uint64_t gpu_addr;
> uint32_t *cpu_ptr;
> void *mem;
> + struct drm_mm_node mm_node;
> };
>
> struct kfd_vmid_info {
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v5 5/6] drm/amdgpu: Add helper to alloc GART entries
2025-12-15 15:14 ` Christian König
@ 2025-12-15 15:50 ` Philip Yang
0 siblings, 0 replies; 13+ messages in thread
From: Philip Yang @ 2025-12-15 15:50 UTC (permalink / raw)
To: Christian König, Philip Yang, amd-gfx
Cc: Felix.Kuehling, david.yatsin, pierre-eric.pelloux-prayer,
kent.russell
On 2025-12-15 10:14, Christian König wrote:
> On 12/10/25 00:43, Philip Yang wrote:
>> Add helper amdgpu_gtt_mgr_alloc/free_entries, export the configurable drm_mm
>> allocator parameters to caller.
>>
>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 27 +++++++++++++++++++++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 7 ++++++
>> 2 files changed, 34 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>> index 895c1e4c6747..d21c7187e4aa 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>> @@ -321,3 +321,30 @@ void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
>> ttm_resource_manager_cleanup(man);
>> ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
>> }
>> +
>> +int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
>> + struct drm_mm_node *node,
>> + u64 num_pages, u64 alignment,
>> + unsigned long color,
>> + enum drm_mm_insert_mode mode)
> The color is unused as far as I remember and the insert mode should be hardcoded, at least I don't see a good reason to expose that.
>
> Any specific reason that was added here?
The color parameter removed in next version, Eric pointed out same
issue, the new alloc color is hardcoded inside alloc_entries.
Thanks,
Philip
>
> Regards,
> Christian.
>
>> +{
>> + struct amdgpu_device *adev = container_of(mgr, typeof(*adev), mman.gtt_mgr);
>> + int r;
>> +
>> + spin_lock(&mgr->lock);
>> + r = drm_mm_insert_node_in_range(&mgr->mm, node, num_pages,
>> + alignment, color, 0,
>> + adev->gmc.gart_size >> PAGE_SHIFT,
>> + mode);
>> + spin_unlock(&mgr->lock);
>> + return r;
>> +}
>> +
>> +void amdgpu_gtt_mgr_free_entries(struct amdgpu_gtt_mgr *mgr,
>> + struct drm_mm_node *mm_node)
>> +{
>> + spin_lock(&mgr->lock);
>> + if (drm_mm_node_allocated(mm_node))
>> + drm_mm_remove_node(mm_node);
>> + spin_unlock(&mgr->lock);
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index 72488124aa59..28511e66d364 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -141,6 +141,13 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
>> bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
>> void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr);
>>
>> +int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
>> + struct drm_mm_node *node,
>> + u64 num_pages, u64 alignment,
>> + unsigned long color,
>> + enum drm_mm_insert_mode mode);
>> +void amdgpu_gtt_mgr_free_entries(struct amdgpu_gtt_mgr *mgr,
>> + struct drm_mm_node *mm_node);
>> uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man);
>>
>> u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v5 6/6] drm/amdkfd: Map VRAM MQD on GART
2025-12-15 15:20 ` Christian König
@ 2025-12-15 16:35 ` Philip Yang
0 siblings, 0 replies; 13+ messages in thread
From: Philip Yang @ 2025-12-15 16:35 UTC (permalink / raw)
To: Christian König, Philip Yang, amd-gfx
Cc: Felix.Kuehling, david.yatsin, pierre-eric.pelloux-prayer,
kent.russell
On 2025-12-15 10:20, Christian König wrote:
> On 12/10/25 00:43, Philip Yang wrote:
>> MQD BO on VRAM access via FB aperture is mtype UC uncaching, map
>> to GART as mtype RW caching, to reduce queue switch latency
>>
>> Add GART mm_node to kfd mem obj to free the GART entries after
>> MQD mem obj is freed.
>>
>> Use resource cursor to handle VRAM resource which maybe on multiple
>> blocks and use cursor_gart to handle GART entries.
>>
>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 89 +++++++++++++++++++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 +-
>> drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 2 +
>> .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 9 ++
>> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 +
>> 5 files changed, 104 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 4f8bc7f35cdc..ae4f60aeed14 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -880,6 +880,62 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
>> }
>> }
>>
>> +static void amdgpu_ttm_gart_bind_gfx9_mqd_vram(struct amdgpu_device *adev,
>> + struct ttm_buffer_object *tbo,
>> + struct drm_mm_node *mm_node,
>> + uint64_t flags)
>> +{
>> + uint64_t total_pages;
>> + int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
>> + uint64_t page_idx, pages_per_xcc;
>> + struct amdgpu_res_cursor cursor_gart;
>> + struct amdgpu_res_cursor cursor;
>> + uint64_t ctrl_flags = flags;
>> + int i;
>> +
>> + total_pages = tbo->resource->size >> PAGE_SHIFT;
> Please use tbo->base.size instead.
done
>
> And it would be nicer if the calculation was in bytes and not pages, but not a must have.
tbo resource and cursor start is bytes, GART entries and cursor start is
page, but it is too much changes for drm mm_node
to use bytes start.
>
>> +
>> + amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_NC, &ctrl_flags);
>> +
>> + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 3))
>> + amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_RW, &flags);
>> +
>> + pages_per_xcc = total_pages;
>> + do_div(pages_per_xcc, num_xcc);
>> +
>> + amdgpu_res_first(NULL, mm_node->start, total_pages, &cursor_gart);
>> + amdgpu_res_first(tbo->resource, 0, tbo->resource->size, &cursor);
>> +
>> + for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
>> + u64 start_page;
>> + u64 npages, n;
>> + u64 pa;
>> +
>> + start_page = cursor_gart.start;
>> + pa = cursor.start + adev->vm_manager.vram_base_offset;
>> + n = 1;
>> + amdgpu_gart_map_vram_range(adev, pa, start_page, n,
>> + flags, NULL);
>> +
>> + npages = pages_per_xcc - 1;
>> + while (npages) {
>> + amdgpu_res_next(&cursor_gart, n);
>> + amdgpu_res_next(&cursor, n * PAGE_SIZE);
>> +
>> + start_page = cursor_gart.start;
>> + pa = cursor.start + adev->vm_manager.vram_base_offset;
>> + n = min3(cursor.size / PAGE_SIZE, cursor_gart.size, npages);
>> +
>> + amdgpu_gart_map_vram_range(adev, pa, start_page, n,
>> + ctrl_flags, NULL);
>> +
>> + npages -= n;
>> + }
>> + amdgpu_res_next(&cursor_gart, n);
>> + amdgpu_res_next(&cursor, n * PAGE_SIZE);
>> + }
>> +}
>> +
>> static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
>> struct ttm_buffer_object *tbo,
>> uint64_t flags)
>> @@ -1017,6 +1073,39 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
>> return 0;
>> }
>>
>> +/*
>> + * amdgpu_ttm_alloc_gart_vram_bo - Bind VRAM pages to GART mapping
>> + *
>> + * call amdgpu_ttm_alloc_gart_entries to alloc GART dynamically
>> + */
>> +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo,
>> + struct drm_mm_node *mm_node,
>> + u64 *gpu_addr)
>> +{
>> + struct ttm_buffer_object *bo = &abo->tbo;
>> + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
>> + uint64_t flags;
>> + int r;
>> +
>> + /* Only for valid VRAM bo resource */
>> + if (bo->resource->start == AMDGPU_BO_INVALID_OFFSET)
>> + return 0;
> Please drop that check. We really shouldn't touch bo->resource->start any more.
How about this check, if MQD on GTT for other ASICs, that is already
mapped correctly.
if (amdgpu_mem_type_to_domain(bo.resource->mem_type) !=
AMDGPU_GEM_DOMAIN_VRAM)
return 0;
>
> Apart from that looks reasonable to me, but I'm wondering if GART re-creation after GPU recovery will still work or not.
The color parameter is removed, the GPU recovery path, gtt recover is
not affected.
Regards,
Philip
>
> @Pierre-Eric could you double check that?
>
> Regards,
> Christian.
>
>> +
>> + r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr, mm_node,
>> + amdgpu_bo_ngpu_pages(abo),
>> + 0, 0, 0);
>> + if (r)
>> + return r;
>> +
>> + /* compute PTE flags for this buffer object */
>> + flags = amdgpu_ttm_tt_pte_flags(adev, NULL, bo->resource);
>> + amdgpu_ttm_gart_bind_gfx9_mqd_vram(adev, bo, mm_node, flags);
>> + amdgpu_gart_invalidate_tlb(adev);
>> +
>> + *gpu_addr = mm_node->start << PAGE_SHIFT;
>> + return 0;
>> +}
>> +
>> /*
>> * amdgpu_ttm_recover_gart - Rebind GTT pages
>> *
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index 28511e66d364..a8b8a541e21b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -140,7 +140,6 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
>>
>> bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
>> void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr);
>> int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
>> struct drm_mm_node *node,
>> u64 num_pages, u64 alignment,
>> @@ -192,6 +191,9 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
>> u64 k_job_id);
>>
>> int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
>> +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo,
>> + struct drm_mm_node *mm_node,
>> + u64 *gpu_addr);
>> void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
>> uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
>> index f78b249e1a41..edb72f4ef82d 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
>> @@ -225,6 +225,8 @@ void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
>> struct kfd_mem_obj *mqd_mem_obj)
>> {
>> if (mqd_mem_obj->mem) {
>> + amdgpu_gtt_mgr_free_entries(&mm->dev->adev->mman.gtt_mgr,
>> + &mqd_mem_obj->mm_node);
>> amdgpu_amdkfd_free_kernel_mem(mm->dev->adev, &mqd_mem_obj->mem);
>> kfree(mqd_mem_obj);
>> } else {
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>> index 14123e1a9716..5828220056bd 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>> @@ -148,6 +148,15 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node,
>> kfree(mqd_mem_obj);
>> return NULL;
>> }
>> +
>> + retval = amdgpu_ttm_alloc_gart_vram_bo(mqd_mem_obj->mem,
>> + &mqd_mem_obj->mm_node,
>> + &(mqd_mem_obj->gpu_addr));
>> + if (retval) {
>> + amdgpu_amdkfd_free_kernel_mem(node->adev, &(mqd_mem_obj->mem));
>> + kfree(mqd_mem_obj);
>> + return NULL;
>> + }
>> } else {
>> retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd),
>> &mqd_mem_obj);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index 29419b3249cf..fdde907836fb 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -252,6 +252,7 @@ struct kfd_mem_obj {
>> uint64_t gpu_addr;
>> uint32_t *cpu_ptr;
>> void *mem;
>> + struct drm_mm_node mm_node;
>> };
>>
>> struct kfd_vmid_info {
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2025-12-15 16:36 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-12-09 23:43 [PATCH v5 0/6] drm/amdkfd: Move gfx9 MQD to HBM Philip Yang
2025-12-09 23:43 ` [PATCH v5 1/6] drm/amdgpu: Fix gfx9 update PTE mtype flag Philip Yang
2025-12-09 23:43 ` [PATCH v5 2/6] drm/amdkfd: Bind MQD in GART with mtype RW Philip Yang
2025-12-09 23:43 ` [PATCH v5 3/6] drm/amdkfd: Add domain parameter to alloc kernel BO Philip Yang
2025-12-09 23:43 ` [PATCH v5 4/6] drm/amdkfd: Move gfx9 MQD to VRAM domain Philip Yang
2025-12-09 23:43 ` [PATCH v5 5/6] drm/amdgpu: Add helper to alloc GART entries Philip Yang
2025-12-10 12:57 ` Pierre-Eric Pelloux-Prayer
2025-12-10 14:05 ` Philip Yang
2025-12-15 15:14 ` Christian König
2025-12-15 15:50 ` Philip Yang
2025-12-09 23:43 ` [PATCH v5 6/6] drm/amdkfd: Map VRAM MQD on GART Philip Yang
2025-12-15 15:20 ` Christian König
2025-12-15 16:35 ` Philip Yang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox