[PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions

AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions
@ 2024-08-19 17:59 Jonathan Kim
  2024-09-05 14:23 ` Christian König
  0 siblings, 1 reply; 11+ messages in thread
From: Jonathan Kim @ 2024-08-19 17:59 UTC (permalink / raw)
  To: amd-gfx
  Cc: Felix.Kuehling, Alexander.Deucher, Mukul.Joshi, Jonathan Kim,
	Jonathan Kim

Currently multiple partitions will incorrectly overwrite the VM lookup
table since the table is indexed by PASID and multiple partitions can
register different VM objects on the same PASID.

This results in loading the wrong VM object on PASID query.

To correct this, setup the lookup table to be per-partition-per-PASID
instead.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    | 12 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c       |  7 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 55 +++++++++++--------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        | 11 +++-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c        |  5 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c        |  5 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c        |  5 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c         |  3 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c         |  5 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c         | 16 ++----
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c        |  2 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c      |  4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_events.c       |  3 +-
 .../gpu/drm/amd/amdkfd/kfd_int_process_v10.c  |  8 +--
 .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c   |  8 +--
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c   |  3 +-
 18 files changed, 92 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index c272461d70a9..28db789610e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -887,3 +887,15 @@ int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
 
 	return r;
 }
+
+int amdgpu_amdkfd_node_id_to_xcc_id(struct amdgpu_device *adev, uint32_t node_id)
+{
+	if (adev->gfx.funcs->ih_node_to_logical_xcc) {
+		int xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, node_id);
+
+		if (xcc_id >= 0)
+			return xcc_id;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 4ed49265c764..bf8bb45d8ab6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -356,6 +356,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
 		uint64_t size, u32 alloc_flag, int8_t xcp_id);
 
 u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id);
+int amdgpu_amdkfd_node_id_to_xcc_id(struct amdgpu_device *adev, uint32_t node_id);
 
 #define KFD_XCP_MEM_ID(adev, xcp_id) \
 		((adev)->xcp_mgr && (xcp_id) >= 0 ?\
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index c6a1783fc9ef..bf9f8802e18d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -37,7 +37,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
 	struct amdgpu_job *job = to_amdgpu_job(s_job);
 	struct amdgpu_task_info *ti;
 	struct amdgpu_device *adev = ring->adev;
-	int idx;
+	int idx, xcp_id = !job->vm ? 0 : job->vm->xcp_id;
 	int r;
 
 	if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
@@ -62,7 +62,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
 		job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
 		ring->fence_drv.sync_seq);
 
-	ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid);
+	ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid, xcp_id);
 	if (ti) {
 		dev_err(adev->dev,
 			"Process information: process %s pid %d thread %s pid %d\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index d9fde38f6ee2..e413bf4a3e84 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1275,17 +1275,20 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		struct amdgpu_vm *vm = &fpriv->vm;
 		struct drm_amdgpu_info_gpuvm_fault gpuvm_fault;
 		unsigned long flags;
+		int i;
 
 		if (!vm)
 			return -EINVAL;
 
 		memset(&gpuvm_fault, 0, sizeof(gpuvm_fault));
 
-		xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+		for (i = 0; i < adev->xcp_mgr->num_xcps; i++)
+			xa_lock_irqsave(&adev->vm_manager.pasids[i], flags);
 		gpuvm_fault.addr = vm->fault_info.addr;
 		gpuvm_fault.status = vm->fault_info.status;
 		gpuvm_fault.vmhub = vm->fault_info.vmhub;
-		xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+		for (i = 0; i < adev->xcp_mgr->num_xcps; i++)
+			xa_unlock_irqrestore(&adev->vm_manager.pasids[i], flags);
 
 		return copy_to_user(out, &gpuvm_fault,
 				    min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index bcb729094521..f43e1c15f423 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -146,7 +146,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		return 0;
 
 	if (vm->pasid) {
-		r = xa_err(xa_erase_irq(&adev->vm_manager.pasids, vm->pasid));
+		r = xa_err(xa_erase_irq(&adev->vm_manager.pasids[vm->xcp_id], vm->pasid));
 		if (r < 0)
 			return r;
 
@@ -154,7 +154,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	}
 
 	if (pasid) {
-		r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm,
+		r = xa_err(xa_store_irq(&adev->vm_manager.pasids[vm->xcp_id], pasid, vm,
 					GFP_KERNEL));
 		if (r < 0)
 			return r;
@@ -2288,14 +2288,14 @@ static void amdgpu_vm_destroy_task_info(struct kref *kref)
 }
 
 static inline struct amdgpu_vm *
-amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid)
+amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid, u32 xcp_id)
 {
 	struct amdgpu_vm *vm;
 	unsigned long flags;
 
-	xa_lock_irqsave(&adev->vm_manager.pasids, flags);
-	vm = xa_load(&adev->vm_manager.pasids, pasid);
-	xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+	xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], flags);
+	vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
+	xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], flags);
 
 	return vm;
 }
@@ -2343,10 +2343,10 @@ amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm)
  * referenced down with amdgpu_vm_put_task_info.
  */
 struct amdgpu_task_info *
-amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid)
+amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid, u32 xcp_id)
 {
 	return amdgpu_vm_get_task_info_vm(
-			amdgpu_vm_get_vm_from_pasid(adev, pasid));
+			amdgpu_vm_get_vm_from_pasid(adev, pasid, xcp_id));
 }
 
 static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm)
@@ -2481,6 +2481,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	amdgpu_bo_unreserve(vm->root.bo);
 	amdgpu_bo_unref(&root_bo);
 
+	vm->xcp_id = xcp_id < 0 ? 0 : xcp_id;
+
 	return 0;
 
 error_free_root:
@@ -2695,8 +2697,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 #else
 	adev->vm_manager.vm_update_mode = 0;
 #endif
-
-	xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
+	for (i = 0; i < MAX_XCP; i++)
+		xa_init_flags(&(adev->vm_manager.pasids[i]), XA_FLAGS_LOCK_IRQ);
 }
 
 /**
@@ -2708,10 +2710,15 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
  */
 void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 {
-	WARN_ON(!xa_empty(&adev->vm_manager.pasids));
-	xa_destroy(&adev->vm_manager.pasids);
+	int i;
+
+	for (i = 0; i < MAX_XCP; i++) {
+		WARN_ON(!xa_empty(&adev->vm_manager.pasids[i]));
+		xa_destroy(&adev->vm_manager.pasids[i]);
+	}
 
 	amdgpu_vmid_mgr_fini(adev);
+
 }
 
 /**
@@ -2778,17 +2785,18 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
 	unsigned long irqflags;
 	uint64_t value, flags;
 	struct amdgpu_vm *vm;
-	int r;
+	int r, xcp_id;
 
-	xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
-	vm = xa_load(&adev->vm_manager.pasids, pasid);
+	xcp_id = amdgpu_amdkfd_node_id_to_xcc_id(adev, node_id)/adev->gfx.num_xcc_per_xcp;
+	xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], irqflags);
+	vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
 	if (vm) {
 		root = amdgpu_bo_ref(vm->root.bo);
 		is_compute_context = vm->is_compute_context;
 	} else {
 		root = NULL;
 	}
-	xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
+	xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], irqflags);
 
 	if (!root)
 		return false;
@@ -2806,11 +2814,11 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
 		goto error_unref;
 
 	/* Double check that the VM still exists */
-	xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
-	vm = xa_load(&adev->vm_manager.pasids, pasid);
+	xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], irqflags);
+	vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
 	if (vm && vm->root.bo != root)
 		vm = NULL;
-	xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
+	xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], irqflags);
 	if (!vm)
 		goto error_unlock;
 
@@ -2968,14 +2976,15 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
 				  unsigned int pasid,
 				  uint64_t addr,
 				  uint32_t status,
-				  unsigned int vmhub)
+				  unsigned int vmhub,
+				  uint32_t xcp_id)
 {
 	struct amdgpu_vm *vm;
 	unsigned long flags;
 
-	xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+	xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], flags);
 
-	vm = xa_load(&adev->vm_manager.pasids, pasid);
+	vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
 	/* Don't update the fault cache if status is 0.  In the multiple
 	 * fault case, subsequent faults will return a 0 status which is
 	 * useless for userspace and replaces the useful fault status, so
@@ -3008,7 +3017,7 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
 			WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
 		}
 	}
-	xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+	xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], flags);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 046949c4b695..1499f5f731e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -35,6 +35,7 @@
 #include "amdgpu_sync.h"
 #include "amdgpu_ring.h"
 #include "amdgpu_ids.h"
+#include "amdgpu_xcp.h"
 
 struct drm_exec;
 
@@ -418,6 +419,9 @@ struct amdgpu_vm {
 
 	/* cached fault info */
 	struct amdgpu_vm_fault_info fault_info;
+
+	/* XCP ID */
+	int xcp_id;
 };
 
 struct amdgpu_vm_manager {
@@ -456,7 +460,7 @@ struct amdgpu_vm_manager {
 	/* PASID to VM mapping, will be used in interrupt context to
 	 * look up VM of a page fault
 	 */
-	struct xarray				pasids;
+	struct xarray				pasids[MAX_XCP];
 	/* Global registration of recent page fault information */
 	struct amdgpu_vm_fault_info	fault_info;
 };
@@ -550,7 +554,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
 
 struct amdgpu_task_info *
-amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid);
+amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid, u32 xcp_id);
 
 struct amdgpu_task_info *
 amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm);
@@ -649,7 +653,8 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
 				  unsigned int pasid,
 				  uint64_t addr,
 				  uint32_t status,
-				  unsigned int vmhub);
+				  unsigned int vmhub,
+				  uint32_t xcp_id);
 void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
 				 struct amdgpu_vm *vm,
 				 struct dma_fence **fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index f0ceab3ce5bf..24b042febf5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -151,7 +151,8 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
 		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
 
 		amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
-					     entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
+					     entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0),
+					     0);
 	}
 
 	if (!printk_ratelimit())
@@ -161,7 +162,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
 		"[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
 		entry->vmid_src ? "mmhub" : "gfxhub",
 		entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
-	task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+	task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
 	if (task_info) {
 		dev_err(adev->dev,
 			" in process %s pid %d thread %s pid %d\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 2797fd84432b..3507046d33e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -122,7 +122,8 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev,
 		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
 
 		amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
-					     entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
+					     entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0),
+					     0);
 	}
 
 	if (printk_ratelimit()) {
@@ -132,7 +133,7 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev,
 			"[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
 			entry->vmid_src ? "mmhub" : "gfxhub",
 			entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
-		task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+		task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
 		if (task_info) {
 			dev_err(adev->dev,
 				" in process %s pid %d thread %s pid %d)\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index 60acf676000b..9844564c6c74 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -115,7 +115,8 @@ static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev,
 		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
 
 		amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
-					     entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
+					     entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0),
+					     0);
 	}
 
 	if (printk_ratelimit()) {
@@ -125,7 +126,7 @@ static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev,
 			"[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
 			entry->vmid_src ? "mmhub" : "gfxhub",
 			entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
-		task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+		task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
 		if (task_info) {
 			dev_err(adev->dev,
 				" in process %s pid %d thread %s pid %d)\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 994432fb57ea..2cdb0cbb7c4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -1268,7 +1268,8 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
 		return 0;
 
 	amdgpu_vm_update_fault_cache(adev, entry->pasid,
-				     ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status, AMDGPU_GFXHUB(0));
+				     ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
+				     AMDGPU_GFXHUB(0), 0);
 
 	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
 		gmc_v7_0_set_fault_enable_default(adev, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 86488c052f82..6855caeb7f74 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1437,7 +1437,8 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
 		return 0;
 
 	amdgpu_vm_update_fault_cache(adev, entry->pasid,
-				     ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status, AMDGPU_GFXHUB(0));
+				     ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
+				     AMDGPU_GFXHUB(0), 0);
 
 	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
 		gmc_v8_0_set_fault_enable_default(adev, false);
@@ -1448,7 +1449,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
 		dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
 			entry->src_id, entry->src_data[0]);
 
-		task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+		task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
 		if (task_info) {
 			dev_err(adev->dev, " for process %s pid %d thread %s pid %d\n",
 				task_info->process_name, task_info->tgid,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index b73136d390cc..e183e08b2c02 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -556,10 +556,12 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 	unsigned int vmhub;
 	u64 addr;
 	uint32_t cam_index = 0;
-	int ret, xcc_id = 0;
-	uint32_t node_id;
+	int ret;
+	uint32_t node_id, xcc_id, xcp_id;
 
 	node_id = entry->node_id;
+	xcc_id = amdgpu_amdkfd_node_id_to_xcc_id(adev, node_id);
+	xcp_id = xcc_id/adev->gfx.num_xcc_per_xcp;
 
 	addr = (u64)entry->src_data[0] << 12;
 	addr |= ((u64)entry->src_data[1] & 0xf) << 44;
@@ -572,12 +574,6 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 		vmhub = AMDGPU_MMHUB1(0);
 	} else {
 		hub_name = "gfxhub0";
-		if (adev->gfx.funcs->ih_node_to_logical_xcc) {
-			xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev,
-				node_id);
-			if (xcc_id < 0)
-				xcc_id = 0;
-		}
 		vmhub = xcc_id;
 	}
 	hub = &adev->vmhub[vmhub];
@@ -631,7 +627,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 		retry_fault ? "retry" : "no-retry",
 		entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
 
-	task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+	task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, xcp_id);
 	if (task_info) {
 		dev_err(adev->dev,
 			" for process %s pid %d thread %s pid %d)\n",
@@ -675,7 +671,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 	if (!amdgpu_sriov_vf(adev))
 		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
 
-	amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub);
+	amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub, xcp_id);
 
 	dev_err(adev->dev,
 		"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 23ef4eb36b40..1ac4224bbe5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -2182,7 +2182,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev,
 			   instance, addr, entry->src_id, entry->ring_id, entry->vmid,
 			   entry->pasid);
 
-	task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+	task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
 	if (task_info) {
 		dev_dbg_ratelimited(adev->dev,
 				    " for process %s pid %d thread %s pid %d\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 57f16c09abfc..c8b5c0302ca7 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -1683,6 +1683,8 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev,
 	int instance;
 	struct amdgpu_task_info *task_info;
 	u64 addr;
+	uint32_t xcc_id = amdgpu_amdkfd_node_id_to_xcc_id(adev, entry->node_id);
+	uint32_t xcp_id = xcc_id/adev->gfx.num_xcc_per_xcp;
 
 	instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
 	if (instance < 0 || instance >= adev->sdma.num_instances) {
@@ -1698,7 +1700,7 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev,
 			    instance, addr, entry->src_id, entry->ring_id, entry->vmid,
 			    entry->pasid);
 
-	task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+	task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, xcp_id);
 	if (task_info) {
 		dev_dbg_ratelimited(adev->dev, " for process %s pid %d thread %s pid %d\n",
 				    task_info->process_name, task_info->tgid,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index ea3792249209..c098fbaf0e1c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -1262,8 +1262,9 @@ void kfd_signal_reset_event(struct kfd_node *dev)
 
 		if (dev->dqm->detect_hang_count) {
 			struct amdgpu_task_info *ti;
+			uint32_t xcp_id = dev->xcp ? dev->xcp->id : 0;
 
-			ti = amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid);
+			ti = amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid, xcp_id);
 			if (ti) {
 				dev_err(dev->adev->dev,
 					"Queues reset on process %s tid %d thread %s pid %d\n",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
index 8e0d0356e810..d7cbf9525698 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
@@ -377,12 +377,8 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
 		struct kfd_hsa_memory_exception_data exception_data;
 
 		/* gfxhub */
-		if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
-			hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev,
-				node_id);
-			if (hub_inst < 0)
-				hub_inst = 0;
-		}
+		if (!vmid_type)
+			hub_inst = amdgpu_amdkfd_node_id_to_xcc_id(dev->adev, node_id);
 
 		/* mmhub */
 		if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index a9c3580be8c9..4708b8c811a5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -437,12 +437,8 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
 		struct kfd_hsa_memory_exception_data exception_data;
 
 		/* gfxhub */
-		if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
-			hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev,
-				node_id);
-			if (hub_inst < 0)
-				hub_inst = 0;
-		}
+		if (!vmid_type)
+			hub_inst = amdgpu_amdkfd_node_id_to_xcc_id(dev->adev, node_id);
 
 		/* mmhub */
 		if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index ea6a8e43bd5b..b5f2f5b1069c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -251,8 +251,9 @@ void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
 void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
 {
 	struct amdgpu_task_info *task_info;
+	uint32_t xcp_id = dev->xcp ? dev->xcp->id : 0;
 
-	task_info = amdgpu_vm_get_task_info_pasid(dev->adev, pasid);
+	task_info = amdgpu_vm_get_task_info_pasid(dev->adev, pasid, xcp_id);
 	if (task_info) {
 		/* Report VM faults from user applications, not retry from kernel */
 		if (task_info->pid)
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions
  2024-08-19 17:59 [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions Jonathan Kim
@ 2024-09-05 14:23 ` Christian König
  2024-09-09 16:02   ` Kim, Jonathan
  0 siblings, 1 reply; 11+ messages in thread
From: Christian König @ 2024-09-05 14:23 UTC (permalink / raw)
  To: Jonathan Kim, amd-gfx; +Cc: Felix.Kuehling, Alexander.Deucher, Mukul.Joshi

Am 19.08.24 um 19:59 schrieb Jonathan Kim:
> Currently multiple partitions will incorrectly overwrite the VM lookup
> table since the table is indexed by PASID and multiple partitions can
> register different VM objects on the same PASID.

That's a rather bad idea. Why do we have the same PASID for different VM 
objects in the first place?

Regards,
Christian.

>
> This results in loading the wrong VM object on PASID query.
>
> To correct this, setup the lookup table to be per-partition-per-PASID
> instead.
>
> Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    | 12 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       |  4 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c       |  7 ++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 55 +++++++++++--------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        | 11 +++-
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c        |  5 +-
>   drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c        |  5 +-
>   drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c        |  5 +-
>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c         |  3 +-
>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c         |  5 +-
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c         | 16 ++----
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c        |  2 +-
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c      |  4 +-
>   drivers/gpu/drm/amd/amdkfd/kfd_events.c       |  3 +-
>   .../gpu/drm/amd/amdkfd/kfd_int_process_v10.c  |  8 +--
>   .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c   |  8 +--
>   drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c   |  3 +-
>   18 files changed, 92 insertions(+), 65 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index c272461d70a9..28db789610e1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -887,3 +887,15 @@ int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
>   
>   	return r;
>   }
> +
> +int amdgpu_amdkfd_node_id_to_xcc_id(struct amdgpu_device *adev, uint32_t node_id)
> +{
> +	if (adev->gfx.funcs->ih_node_to_logical_xcc) {
> +		int xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, node_id);
> +
> +		if (xcc_id >= 0)
> +			return xcc_id;
> +	}
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 4ed49265c764..bf8bb45d8ab6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -356,6 +356,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
>   		uint64_t size, u32 alloc_flag, int8_t xcp_id);
>   
>   u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id);
> +int amdgpu_amdkfd_node_id_to_xcc_id(struct amdgpu_device *adev, uint32_t node_id);
>   
>   #define KFD_XCP_MEM_ID(adev, xcp_id) \
>   		((adev)->xcp_mgr && (xcp_id) >= 0 ?\
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index c6a1783fc9ef..bf9f8802e18d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -37,7 +37,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
>   	struct amdgpu_job *job = to_amdgpu_job(s_job);
>   	struct amdgpu_task_info *ti;
>   	struct amdgpu_device *adev = ring->adev;
> -	int idx;
> +	int idx, xcp_id = !job->vm ? 0 : job->vm->xcp_id;
>   	int r;
>   
>   	if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
> @@ -62,7 +62,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
>   		job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
>   		ring->fence_drv.sync_seq);
>   
> -	ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid);
> +	ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid, xcp_id);
>   	if (ti) {
>   		dev_err(adev->dev,
>   			"Process information: process %s pid %d thread %s pid %d\n",
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index d9fde38f6ee2..e413bf4a3e84 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -1275,17 +1275,20 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   		struct amdgpu_vm *vm = &fpriv->vm;
>   		struct drm_amdgpu_info_gpuvm_fault gpuvm_fault;
>   		unsigned long flags;
> +		int i;
>   
>   		if (!vm)
>   			return -EINVAL;
>   
>   		memset(&gpuvm_fault, 0, sizeof(gpuvm_fault));
>   
> -		xa_lock_irqsave(&adev->vm_manager.pasids, flags);
> +		for (i = 0; i < adev->xcp_mgr->num_xcps; i++)
> +			xa_lock_irqsave(&adev->vm_manager.pasids[i], flags);
>   		gpuvm_fault.addr = vm->fault_info.addr;
>   		gpuvm_fault.status = vm->fault_info.status;
>   		gpuvm_fault.vmhub = vm->fault_info.vmhub;
> -		xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
> +		for (i = 0; i < adev->xcp_mgr->num_xcps; i++)
> +			xa_unlock_irqrestore(&adev->vm_manager.pasids[i], flags);
>   
>   		return copy_to_user(out, &gpuvm_fault,
>   				    min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index bcb729094521..f43e1c15f423 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -146,7 +146,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		return 0;
>   
>   	if (vm->pasid) {
> -		r = xa_err(xa_erase_irq(&adev->vm_manager.pasids, vm->pasid));
> +		r = xa_err(xa_erase_irq(&adev->vm_manager.pasids[vm->xcp_id], vm->pasid));
>   		if (r < 0)
>   			return r;
>   
> @@ -154,7 +154,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	}
>   
>   	if (pasid) {
> -		r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm,
> +		r = xa_err(xa_store_irq(&adev->vm_manager.pasids[vm->xcp_id], pasid, vm,
>   					GFP_KERNEL));
>   		if (r < 0)
>   			return r;
> @@ -2288,14 +2288,14 @@ static void amdgpu_vm_destroy_task_info(struct kref *kref)
>   }
>   
>   static inline struct amdgpu_vm *
> -amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid)
> +amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid, u32 xcp_id)
>   {
>   	struct amdgpu_vm *vm;
>   	unsigned long flags;
>   
> -	xa_lock_irqsave(&adev->vm_manager.pasids, flags);
> -	vm = xa_load(&adev->vm_manager.pasids, pasid);
> -	xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
> +	xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], flags);
> +	vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
> +	xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], flags);
>   
>   	return vm;
>   }
> @@ -2343,10 +2343,10 @@ amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm)
>    * referenced down with amdgpu_vm_put_task_info.
>    */
>   struct amdgpu_task_info *
> -amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid)
> +amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid, u32 xcp_id)
>   {
>   	return amdgpu_vm_get_task_info_vm(
> -			amdgpu_vm_get_vm_from_pasid(adev, pasid));
> +			amdgpu_vm_get_vm_from_pasid(adev, pasid, xcp_id));
>   }
>   
>   static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm)
> @@ -2481,6 +2481,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	amdgpu_bo_unreserve(vm->root.bo);
>   	amdgpu_bo_unref(&root_bo);
>   
> +	vm->xcp_id = xcp_id < 0 ? 0 : xcp_id;
> +
>   	return 0;
>   
>   error_free_root:
> @@ -2695,8 +2697,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
>   #else
>   	adev->vm_manager.vm_update_mode = 0;
>   #endif
> -
> -	xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
> +	for (i = 0; i < MAX_XCP; i++)
> +		xa_init_flags(&(adev->vm_manager.pasids[i]), XA_FLAGS_LOCK_IRQ);
>   }
>   
>   /**
> @@ -2708,10 +2710,15 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
>    */
>   void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
>   {
> -	WARN_ON(!xa_empty(&adev->vm_manager.pasids));
> -	xa_destroy(&adev->vm_manager.pasids);
> +	int i;
> +
> +	for (i = 0; i < MAX_XCP; i++) {
> +		WARN_ON(!xa_empty(&adev->vm_manager.pasids[i]));
> +		xa_destroy(&adev->vm_manager.pasids[i]);
> +	}
>   
>   	amdgpu_vmid_mgr_fini(adev);
> +
>   }
>   
>   /**
> @@ -2778,17 +2785,18 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
>   	unsigned long irqflags;
>   	uint64_t value, flags;
>   	struct amdgpu_vm *vm;
> -	int r;
> +	int r, xcp_id;
>   
> -	xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
> -	vm = xa_load(&adev->vm_manager.pasids, pasid);
> +	xcp_id = amdgpu_amdkfd_node_id_to_xcc_id(adev, node_id)/adev->gfx.num_xcc_per_xcp;
> +	xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], irqflags);
> +	vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
>   	if (vm) {
>   		root = amdgpu_bo_ref(vm->root.bo);
>   		is_compute_context = vm->is_compute_context;
>   	} else {
>   		root = NULL;
>   	}
> -	xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
> +	xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], irqflags);
>   
>   	if (!root)
>   		return false;
> @@ -2806,11 +2814,11 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
>   		goto error_unref;
>   
>   	/* Double check that the VM still exists */
> -	xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
> -	vm = xa_load(&adev->vm_manager.pasids, pasid);
> +	xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], irqflags);
> +	vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
>   	if (vm && vm->root.bo != root)
>   		vm = NULL;
> -	xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
> +	xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], irqflags);
>   	if (!vm)
>   		goto error_unlock;
>   
> @@ -2968,14 +2976,15 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
>   				  unsigned int pasid,
>   				  uint64_t addr,
>   				  uint32_t status,
> -				  unsigned int vmhub)
> +				  unsigned int vmhub,
> +				  uint32_t xcp_id)
>   {
>   	struct amdgpu_vm *vm;
>   	unsigned long flags;
>   
> -	xa_lock_irqsave(&adev->vm_manager.pasids, flags);
> +	xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], flags);
>   
> -	vm = xa_load(&adev->vm_manager.pasids, pasid);
> +	vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
>   	/* Don't update the fault cache if status is 0.  In the multiple
>   	 * fault case, subsequent faults will return a 0 status which is
>   	 * useless for userspace and replaces the useful fault status, so
> @@ -3008,7 +3017,7 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
>   			WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
>   		}
>   	}
> -	xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
> +	xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], flags);
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 046949c4b695..1499f5f731e9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -35,6 +35,7 @@
>   #include "amdgpu_sync.h"
>   #include "amdgpu_ring.h"
>   #include "amdgpu_ids.h"
> +#include "amdgpu_xcp.h"
>   
>   struct drm_exec;
>   
> @@ -418,6 +419,9 @@ struct amdgpu_vm {
>   
>   	/* cached fault info */
>   	struct amdgpu_vm_fault_info fault_info;
> +
> +	/* XCP ID */
> +	int xcp_id;
>   };
>   
>   struct amdgpu_vm_manager {
> @@ -456,7 +460,7 @@ struct amdgpu_vm_manager {
>   	/* PASID to VM mapping, will be used in interrupt context to
>   	 * look up VM of a page fault
>   	 */
> -	struct xarray				pasids;
> +	struct xarray				pasids[MAX_XCP];
>   	/* Global registration of recent page fault information */
>   	struct amdgpu_vm_fault_info	fault_info;
>   };
> @@ -550,7 +554,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
>   void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
>   
>   struct amdgpu_task_info *
> -amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid);
> +amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid, u32 xcp_id);
>   
>   struct amdgpu_task_info *
>   amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm);
> @@ -649,7 +653,8 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
>   				  unsigned int pasid,
>   				  uint64_t addr,
>   				  uint32_t status,
> -				  unsigned int vmhub);
> +				  unsigned int vmhub,
> +				  uint32_t xcp_id);
>   void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
>   				 struct amdgpu_vm *vm,
>   				 struct dma_fence **fence);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index f0ceab3ce5bf..24b042febf5c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -151,7 +151,8 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
>   		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>   
>   		amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
> -					     entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
> +					     entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0),
> +					     0);
>   	}
>   
>   	if (!printk_ratelimit())
> @@ -161,7 +162,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
>   		"[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
>   		entry->vmid_src ? "mmhub" : "gfxhub",
>   		entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
> -	task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
> +	task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>   	if (task_info) {
>   		dev_err(adev->dev,
>   			" in process %s pid %d thread %s pid %d\n",
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> index 2797fd84432b..3507046d33e6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> @@ -122,7 +122,8 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev,
>   		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>   
>   		amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
> -					     entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
> +					     entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0),
> +					     0);
>   	}
>   
>   	if (printk_ratelimit()) {
> @@ -132,7 +133,7 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev,
>   			"[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
>   			entry->vmid_src ? "mmhub" : "gfxhub",
>   			entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
> -		task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
> +		task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>   		if (task_info) {
>   			dev_err(adev->dev,
>   				" in process %s pid %d thread %s pid %d)\n",
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> index 60acf676000b..9844564c6c74 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> @@ -115,7 +115,8 @@ static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev,
>   		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>   
>   		amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
> -					     entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
> +					     entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0),
> +					     0);
>   	}
>   
>   	if (printk_ratelimit()) {
> @@ -125,7 +126,7 @@ static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev,
>   			"[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
>   			entry->vmid_src ? "mmhub" : "gfxhub",
>   			entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
> -		task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
> +		task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>   		if (task_info) {
>   			dev_err(adev->dev,
>   				" in process %s pid %d thread %s pid %d)\n",
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index 994432fb57ea..2cdb0cbb7c4d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -1268,7 +1268,8 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
>   		return 0;
>   
>   	amdgpu_vm_update_fault_cache(adev, entry->pasid,
> -				     ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status, AMDGPU_GFXHUB(0));
> +				     ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
> +				     AMDGPU_GFXHUB(0), 0);
>   
>   	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
>   		gmc_v7_0_set_fault_enable_default(adev, false);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 86488c052f82..6855caeb7f74 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -1437,7 +1437,8 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
>   		return 0;
>   
>   	amdgpu_vm_update_fault_cache(adev, entry->pasid,
> -				     ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status, AMDGPU_GFXHUB(0));
> +				     ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
> +				     AMDGPU_GFXHUB(0), 0);
>   
>   	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
>   		gmc_v8_0_set_fault_enable_default(adev, false);
> @@ -1448,7 +1449,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
>   		dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
>   			entry->src_id, entry->src_data[0]);
>   
> -		task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
> +		task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>   		if (task_info) {
>   			dev_err(adev->dev, " for process %s pid %d thread %s pid %d\n",
>   				task_info->process_name, task_info->tgid,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index b73136d390cc..e183e08b2c02 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -556,10 +556,12 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
>   	unsigned int vmhub;
>   	u64 addr;
>   	uint32_t cam_index = 0;
> -	int ret, xcc_id = 0;
> -	uint32_t node_id;
> +	int ret;
> +	uint32_t node_id, xcc_id, xcp_id;
>   
>   	node_id = entry->node_id;
> +	xcc_id = amdgpu_amdkfd_node_id_to_xcc_id(adev, node_id);
> +	xcp_id = xcc_id/adev->gfx.num_xcc_per_xcp;
>   
>   	addr = (u64)entry->src_data[0] << 12;
>   	addr |= ((u64)entry->src_data[1] & 0xf) << 44;
> @@ -572,12 +574,6 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
>   		vmhub = AMDGPU_MMHUB1(0);
>   	} else {
>   		hub_name = "gfxhub0";
> -		if (adev->gfx.funcs->ih_node_to_logical_xcc) {
> -			xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev,
> -				node_id);
> -			if (xcc_id < 0)
> -				xcc_id = 0;
> -		}
>   		vmhub = xcc_id;
>   	}
>   	hub = &adev->vmhub[vmhub];
> @@ -631,7 +627,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
>   		retry_fault ? "retry" : "no-retry",
>   		entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
>   
> -	task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
> +	task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, xcp_id);
>   	if (task_info) {
>   		dev_err(adev->dev,
>   			" for process %s pid %d thread %s pid %d)\n",
> @@ -675,7 +671,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
>   	if (!amdgpu_sriov_vf(adev))
>   		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>   
> -	amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub);
> +	amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub, xcp_id);
>   
>   	dev_err(adev->dev,
>   		"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index 23ef4eb36b40..1ac4224bbe5b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -2182,7 +2182,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev,
>   			   instance, addr, entry->src_id, entry->ring_id, entry->vmid,
>   			   entry->pasid);
>   
> -	task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
> +	task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>   	if (task_info) {
>   		dev_dbg_ratelimited(adev->dev,
>   				    " for process %s pid %d thread %s pid %d\n",
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> index 57f16c09abfc..c8b5c0302ca7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> @@ -1683,6 +1683,8 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev,
>   	int instance;
>   	struct amdgpu_task_info *task_info;
>   	u64 addr;
> +	uint32_t xcc_id = amdgpu_amdkfd_node_id_to_xcc_id(adev, entry->node_id);
> +	uint32_t xcp_id = xcc_id/adev->gfx.num_xcc_per_xcp;
>   
>   	instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
>   	if (instance < 0 || instance >= adev->sdma.num_instances) {
> @@ -1698,7 +1700,7 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev,
>   			    instance, addr, entry->src_id, entry->ring_id, entry->vmid,
>   			    entry->pasid);
>   
> -	task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
> +	task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, xcp_id);
>   	if (task_info) {
>   		dev_dbg_ratelimited(adev->dev, " for process %s pid %d thread %s pid %d\n",
>   				    task_info->process_name, task_info->tgid,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> index ea3792249209..c098fbaf0e1c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> @@ -1262,8 +1262,9 @@ void kfd_signal_reset_event(struct kfd_node *dev)
>   
>   		if (dev->dqm->detect_hang_count) {
>   			struct amdgpu_task_info *ti;
> +			uint32_t xcp_id = dev->xcp ? dev->xcp->id : 0;
>   
> -			ti = amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid);
> +			ti = amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid, xcp_id);
>   			if (ti) {
>   				dev_err(dev->adev->dev,
>   					"Queues reset on process %s tid %d thread %s pid %d\n",
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
> index 8e0d0356e810..d7cbf9525698 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
> @@ -377,12 +377,8 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
>   		struct kfd_hsa_memory_exception_data exception_data;
>   
>   		/* gfxhub */
> -		if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
> -			hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev,
> -				node_id);
> -			if (hub_inst < 0)
> -				hub_inst = 0;
> -		}
> +		if (!vmid_type)
> +			hub_inst = amdgpu_amdkfd_node_id_to_xcc_id(dev->adev, node_id);
>   
>   		/* mmhub */
>   		if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
> index a9c3580be8c9..4708b8c811a5 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
> @@ -437,12 +437,8 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
>   		struct kfd_hsa_memory_exception_data exception_data;
>   
>   		/* gfxhub */
> -		if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
> -			hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev,
> -				node_id);
> -			if (hub_inst < 0)
> -				hub_inst = 0;
> -		}
> +		if (!vmid_type)
> +			hub_inst = amdgpu_amdkfd_node_id_to_xcc_id(dev->adev, node_id);
>   
>   		/* mmhub */
>   		if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> index ea6a8e43bd5b..b5f2f5b1069c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> @@ -251,8 +251,9 @@ void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
>   void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
>   {
>   	struct amdgpu_task_info *task_info;
> +	uint32_t xcp_id = dev->xcp ? dev->xcp->id : 0;
>   
> -	task_info = amdgpu_vm_get_task_info_pasid(dev->adev, pasid);
> +	task_info = amdgpu_vm_get_task_info_pasid(dev->adev, pasid, xcp_id);
>   	if (task_info) {
>   		/* Report VM faults from user applications, not retry from kernel */
>   		if (task_info->pid)


^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions
  2024-09-05 14:23 ` Christian König
@ 2024-09-09 16:02   ` Kim, Jonathan
  2024-09-09 18:46     ` Christian König
  0 siblings, 1 reply; 11+ messages in thread
From: Kim, Jonathan @ 2024-09-09 16:02 UTC (permalink / raw)
  To: Christian König, amd-gfx@lists.freedesktop.org
  Cc: Kuehling, Felix, Deucher, Alexander, Joshi, Mukul

[Public]

> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Thursday, September 5, 2024 10:24 AM
> To: Kim, Jonathan <Jonathan.Kim@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Deucher, Alexander
> <Alexander.Deucher@amd.com>; Joshi, Mukul <Mukul.Joshi@amd.com>
> Subject: Re: [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions
>
> Caution: This message originated from an External Source. Use proper caution
> when opening attachments, clicking links, or responding.
>
>
> Am 19.08.24 um 19:59 schrieb Jonathan Kim:
> > Currently multiple partitions will incorrectly overwrite the VM lookup
> > table since the table is indexed by PASID and multiple partitions can
> > register different VM objects on the same PASID.
>
> That's a rather bad idea. Why do we have the same PASID for different VM
> objects in the first place?

Alex can probably elaborate on the KGD side, but from what I can see, the KMS driver open call has always assigned a new VM object per PASID on an open call.
The KFD acquires and replaces the KGD PASID-VMID registration on its own compute process open/creation call.
If this is the bad_idea you're referring to, then someone else will have to chime in.  I don't have much history on this unfortunately.

That aside, the current problem is, is that all KFD device structures are logical partitions and register their PASID-VM binding using this concept of a device.
On the KGD side however, the registration table is maintained in the adev struct, which is a physical socket.
So there's a mismatch in understanding of what a device is between the KFD & KGD with regard to the look up table that results in bad bindings.

Adding a per-partition dimension to the existing lookup table resolves issues where seeing, for example, with memory violation interception and XNACK i.e bad bindings result in wrong vm object found to set no-retry flags on memory violations.

Jon

>
> Regards,
> Christian.
>
> >
> > This results in loading the wrong VM object on PASID query.
> >
> > To correct this, setup the lookup table to be per-partition-per-PASID
> > instead.
> >
> > Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    | 12 ++++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  1 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       |  4 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c       |  7 ++-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 55 +++++++++++------
> --
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        | 11 +++-
> >   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c        |  5 +-
> >   drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c        |  5 +-
> >   drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c        |  5 +-
> >   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c         |  3 +-
> >   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c         |  5 +-
> >   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c         | 16 ++----
> >   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c        |  2 +-
> >   drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c      |  4 +-
> >   drivers/gpu/drm/amd/amdkfd/kfd_events.c       |  3 +-
> >   .../gpu/drm/amd/amdkfd/kfd_int_process_v10.c  |  8 +--
> >   .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c   |  8 +--
> >   drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c   |  3 +-
> >   18 files changed, 92 insertions(+), 65 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > index c272461d70a9..28db789610e1 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > @@ -887,3 +887,15 @@ int amdgpu_amdkfd_unmap_hiq(struct
> amdgpu_device *adev, u32 doorbell_off,
> >
> >       return r;
> >   }
> > +
> > +int amdgpu_amdkfd_node_id_to_xcc_id(struct amdgpu_device *adev,
> uint32_t node_id)
> > +{
> > +     if (adev->gfx.funcs->ih_node_to_logical_xcc) {
> > +             int xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, node_id);
> > +
> > +             if (xcc_id >= 0)
> > +                     return xcc_id;
> > +     }
> > +
> > +     return 0;
> > +}
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > index 4ed49265c764..bf8bb45d8ab6 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > @@ -356,6 +356,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct
> amdgpu_device *adev,
> >               uint64_t size, u32 alloc_flag, int8_t xcp_id);
> >
> >   u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int
> xcp_id);
> > +int amdgpu_amdkfd_node_id_to_xcc_id(struct amdgpu_device *adev,
> uint32_t node_id);
> >
> >   #define KFD_XCP_MEM_ID(adev, xcp_id) \
> >               ((adev)->xcp_mgr && (xcp_id) >= 0 ?\
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > index c6a1783fc9ef..bf9f8802e18d 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > @@ -37,7 +37,7 @@ static enum drm_gpu_sched_stat
> amdgpu_job_timedout(struct drm_sched_job *s_job)
> >       struct amdgpu_job *job = to_amdgpu_job(s_job);
> >       struct amdgpu_task_info *ti;
> >       struct amdgpu_device *adev = ring->adev;
> > -     int idx;
> > +     int idx, xcp_id = !job->vm ? 0 : job->vm->xcp_id;
> >       int r;
> >
> >       if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
> > @@ -62,7 +62,7 @@ static enum drm_gpu_sched_stat
> amdgpu_job_timedout(struct drm_sched_job *s_job)
> >               job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
> >               ring->fence_drv.sync_seq);
> >
> > -     ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid);
> > +     ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid, xcp_id);
> >       if (ti) {
> >               dev_err(adev->dev,
> >                       "Process information: process %s pid %d thread %s pid %d\n",
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > index d9fde38f6ee2..e413bf4a3e84 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > @@ -1275,17 +1275,20 @@ int amdgpu_info_ioctl(struct drm_device *dev,
> void *data, struct drm_file *filp)
> >               struct amdgpu_vm *vm = &fpriv->vm;
> >               struct drm_amdgpu_info_gpuvm_fault gpuvm_fault;
> >               unsigned long flags;
> > +             int i;
> >
> >               if (!vm)
> >                       return -EINVAL;
> >
> >               memset(&gpuvm_fault, 0, sizeof(gpuvm_fault));
> >
> > -             xa_lock_irqsave(&adev->vm_manager.pasids, flags);
> > +             for (i = 0; i < adev->xcp_mgr->num_xcps; i++)
> > +                     xa_lock_irqsave(&adev->vm_manager.pasids[i], flags);
> >               gpuvm_fault.addr = vm->fault_info.addr;
> >               gpuvm_fault.status = vm->fault_info.status;
> >               gpuvm_fault.vmhub = vm->fault_info.vmhub;
> > -             xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
> > +             for (i = 0; i < adev->xcp_mgr->num_xcps; i++)
> > +                     xa_unlock_irqrestore(&adev->vm_manager.pasids[i], flags);
> >
> >               return copy_to_user(out, &gpuvm_fault,
> >                                   min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > index bcb729094521..f43e1c15f423 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > @@ -146,7 +146,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device
> *adev, struct amdgpu_vm *vm,
> >               return 0;
> >
> >       if (vm->pasid) {
> > -             r = xa_err(xa_erase_irq(&adev->vm_manager.pasids, vm->pasid));
> > +             r = xa_err(xa_erase_irq(&adev->vm_manager.pasids[vm->xcp_id],
> vm->pasid));
> >               if (r < 0)
> >                       return r;
> >
> > @@ -154,7 +154,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device
> *adev, struct amdgpu_vm *vm,
> >       }
> >
> >       if (pasid) {
> > -             r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm,
> > +             r = xa_err(xa_store_irq(&adev->vm_manager.pasids[vm->xcp_id],
> pasid, vm,
> >                                       GFP_KERNEL));
> >               if (r < 0)
> >                       return r;
> > @@ -2288,14 +2288,14 @@ static void
> amdgpu_vm_destroy_task_info(struct kref *kref)
> >   }
> >
> >   static inline struct amdgpu_vm *
> > -amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid)
> > +amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid,
> u32 xcp_id)
> >   {
> >       struct amdgpu_vm *vm;
> >       unsigned long flags;
> >
> > -     xa_lock_irqsave(&adev->vm_manager.pasids, flags);
> > -     vm = xa_load(&adev->vm_manager.pasids, pasid);
> > -     xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
> > +     xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], flags);
> > +     vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
> > +     xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], flags);
> >
> >       return vm;
> >   }
> > @@ -2343,10 +2343,10 @@ amdgpu_vm_get_task_info_vm(struct
> amdgpu_vm *vm)
> >    * referenced down with amdgpu_vm_put_task_info.
> >    */
> >   struct amdgpu_task_info *
> > -amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid)
> > +amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid,
> u32 xcp_id)
> >   {
> >       return amdgpu_vm_get_task_info_vm(
> > -                     amdgpu_vm_get_vm_from_pasid(adev, pasid));
> > +                     amdgpu_vm_get_vm_from_pasid(adev, pasid, xcp_id));
> >   }
> >
> >   static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm)
> > @@ -2481,6 +2481,8 @@ int amdgpu_vm_init(struct amdgpu_device
> *adev, struct amdgpu_vm *vm,
> >       amdgpu_bo_unreserve(vm->root.bo);
> >       amdgpu_bo_unref(&root_bo);
> >
> > +     vm->xcp_id = xcp_id < 0 ? 0 : xcp_id;
> > +
> >       return 0;
> >
> >   error_free_root:
> > @@ -2695,8 +2697,8 @@ void amdgpu_vm_manager_init(struct
> amdgpu_device *adev)
> >   #else
> >       adev->vm_manager.vm_update_mode = 0;
> >   #endif
> > -
> > -     xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
> > +     for (i = 0; i < MAX_XCP; i++)
> > +             xa_init_flags(&(adev->vm_manager.pasids[i]),
> XA_FLAGS_LOCK_IRQ);
> >   }
> >
> >   /**
> > @@ -2708,10 +2710,15 @@ void amdgpu_vm_manager_init(struct
> amdgpu_device *adev)
> >    */
> >   void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
> >   {
> > -     WARN_ON(!xa_empty(&adev->vm_manager.pasids));
> > -     xa_destroy(&adev->vm_manager.pasids);
> > +     int i;
> > +
> > +     for (i = 0; i < MAX_XCP; i++) {
> > +             WARN_ON(!xa_empty(&adev->vm_manager.pasids[i]));
> > +             xa_destroy(&adev->vm_manager.pasids[i]);
> > +     }
> >
> >       amdgpu_vmid_mgr_fini(adev);
> > +
> >   }
> >
> >   /**
> > @@ -2778,17 +2785,18 @@ bool amdgpu_vm_handle_fault(struct
> amdgpu_device *adev, u32 pasid,
> >       unsigned long irqflags;
> >       uint64_t value, flags;
> >       struct amdgpu_vm *vm;
> > -     int r;
> > +     int r, xcp_id;
> >
> > -     xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
> > -     vm = xa_load(&adev->vm_manager.pasids, pasid);
> > +     xcp_id = amdgpu_amdkfd_node_id_to_xcc_id(adev, node_id)/adev-
> >gfx.num_xcc_per_xcp;
> > +     xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], irqflags);
> > +     vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
> >       if (vm) {
> >               root = amdgpu_bo_ref(vm->root.bo);
> >               is_compute_context = vm->is_compute_context;
> >       } else {
> >               root = NULL;
> >       }
> > -     xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
> > +     xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], irqflags);
> >
> >       if (!root)
> >               return false;
> > @@ -2806,11 +2814,11 @@ bool amdgpu_vm_handle_fault(struct
> amdgpu_device *adev, u32 pasid,
> >               goto error_unref;
> >
> >       /* Double check that the VM still exists */
> > -     xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
> > -     vm = xa_load(&adev->vm_manager.pasids, pasid);
> > +     xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], irqflags);
> > +     vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
> >       if (vm && vm->root.bo != root)
> >               vm = NULL;
> > -     xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
> > +     xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], irqflags);
> >       if (!vm)
> >               goto error_unlock;
> >
> > @@ -2968,14 +2976,15 @@ void amdgpu_vm_update_fault_cache(struct
> amdgpu_device *adev,
> >                                 unsigned int pasid,
> >                                 uint64_t addr,
> >                                 uint32_t status,
> > -                               unsigned int vmhub)
> > +                               unsigned int vmhub,
> > +                               uint32_t xcp_id)
> >   {
> >       struct amdgpu_vm *vm;
> >       unsigned long flags;
> >
> > -     xa_lock_irqsave(&adev->vm_manager.pasids, flags);
> > +     xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], flags);
> >
> > -     vm = xa_load(&adev->vm_manager.pasids, pasid);
> > +     vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
> >       /* Don't update the fault cache if status is 0.  In the multiple
> >        * fault case, subsequent faults will return a 0 status which is
> >        * useless for userspace and replaces the useful fault status, so
> > @@ -3008,7 +3017,7 @@ void amdgpu_vm_update_fault_cache(struct
> amdgpu_device *adev,
> >                       WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
> >               }
> >       }
> > -     xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
> > +     xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], flags);
> >   }
> >
> >   /**
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > index 046949c4b695..1499f5f731e9 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > @@ -35,6 +35,7 @@
> >   #include "amdgpu_sync.h"
> >   #include "amdgpu_ring.h"
> >   #include "amdgpu_ids.h"
> > +#include "amdgpu_xcp.h"
> >
> >   struct drm_exec;
> >
> > @@ -418,6 +419,9 @@ struct amdgpu_vm {
> >
> >       /* cached fault info */
> >       struct amdgpu_vm_fault_info fault_info;
> > +
> > +     /* XCP ID */
> > +     int xcp_id;
> >   };
> >
> >   struct amdgpu_vm_manager {
> > @@ -456,7 +460,7 @@ struct amdgpu_vm_manager {
> >       /* PASID to VM mapping, will be used in interrupt context to
> >        * look up VM of a page fault
> >        */
> > -     struct xarray                           pasids;
> > +     struct xarray                           pasids[MAX_XCP];
> >       /* Global registration of recent page fault information */
> >       struct amdgpu_vm_fault_info     fault_info;
> >   };
> > @@ -550,7 +554,7 @@ bool amdgpu_vm_need_pipeline_sync(struct
> amdgpu_ring *ring,
> >   void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
> >
> >   struct amdgpu_task_info *
> > -amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid);
> > +amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid,
> u32 xcp_id);
> >
> >   struct amdgpu_task_info *
> >   amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm);
> > @@ -649,7 +653,8 @@ void amdgpu_vm_update_fault_cache(struct
> amdgpu_device *adev,
> >                                 unsigned int pasid,
> >                                 uint64_t addr,
> >                                 uint32_t status,
> > -                               unsigned int vmhub);
> > +                               unsigned int vmhub,
> > +                               uint32_t xcp_id);
> >   void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
> >                                struct amdgpu_vm *vm,
> >                                struct dma_fence **fence);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> > index f0ceab3ce5bf..24b042febf5c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> > @@ -151,7 +151,8 @@ static int gmc_v10_0_process_interrupt(struct
> amdgpu_device *adev,
> >               WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
> >
> >               amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
> > -                                          entry->vmid_src ? AMDGPU_MMHUB0(0) :
> AMDGPU_GFXHUB(0));
> > +                                          entry->vmid_src ? AMDGPU_MMHUB0(0) :
> AMDGPU_GFXHUB(0),
> > +                                          0);
> >       }
> >
> >       if (!printk_ratelimit())
> > @@ -161,7 +162,7 @@ static int gmc_v10_0_process_interrupt(struct
> amdgpu_device *adev,
> >               "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
> >               entry->vmid_src ? "mmhub" : "gfxhub",
> >               entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
> > -     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
> > +     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
> >       if (task_info) {
> >               dev_err(adev->dev,
> >                       " in process %s pid %d thread %s pid %d\n",
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> > index 2797fd84432b..3507046d33e6 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> > @@ -122,7 +122,8 @@ static int gmc_v11_0_process_interrupt(struct
> amdgpu_device *adev,
> >               WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
> >
> >               amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
> > -                                          entry->vmid_src ? AMDGPU_MMHUB0(0) :
> AMDGPU_GFXHUB(0));
> > +                                          entry->vmid_src ? AMDGPU_MMHUB0(0) :
> AMDGPU_GFXHUB(0),
> > +                                          0);
> >       }
> >
> >       if (printk_ratelimit()) {
> > @@ -132,7 +133,7 @@ static int gmc_v11_0_process_interrupt(struct
> amdgpu_device *adev,
> >                       "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
> >                       entry->vmid_src ? "mmhub" : "gfxhub",
> >                       entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
> > -             task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
> > +             task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
> >               if (task_info) {
> >                       dev_err(adev->dev,
> >                               " in process %s pid %d thread %s pid %d)\n",
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> > index 60acf676000b..9844564c6c74 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> > @@ -115,7 +115,8 @@ static int gmc_v12_0_process_interrupt(struct
> amdgpu_device *adev,
> >               WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
> >
> >               amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
> > -                                          entry->vmid_src ? AMDGPU_MMHUB0(0) :
> AMDGPU_GFXHUB(0));
> > +                                          entry->vmid_src ? AMDGPU_MMHUB0(0) :
> AMDGPU_GFXHUB(0),
> > +                                          0);
> >       }
> >
> >       if (printk_ratelimit()) {
> > @@ -125,7 +126,7 @@ static int gmc_v12_0_process_interrupt(struct
> amdgpu_device *adev,
> >                       "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
> >                       entry->vmid_src ? "mmhub" : "gfxhub",
> >                       entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
> > -             task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
> > +             task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
> >               if (task_info) {
> >                       dev_err(adev->dev,
> >                               " in process %s pid %d thread %s pid %d)\n",
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> > index 994432fb57ea..2cdb0cbb7c4d 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> > @@ -1268,7 +1268,8 @@ static int gmc_v7_0_process_interrupt(struct
> amdgpu_device *adev,
> >               return 0;
> >
> >       amdgpu_vm_update_fault_cache(adev, entry->pasid,
> > -                                  ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
> AMDGPU_GFXHUB(0));
> > +                                  ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
> > +                                  AMDGPU_GFXHUB(0), 0);
> >
> >       if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
> >               gmc_v7_0_set_fault_enable_default(adev, false);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> > index 86488c052f82..6855caeb7f74 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> > @@ -1437,7 +1437,8 @@ static int gmc_v8_0_process_interrupt(struct
> amdgpu_device *adev,
> >               return 0;
> >
> >       amdgpu_vm_update_fault_cache(adev, entry->pasid,
> > -                                  ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
> AMDGPU_GFXHUB(0));
> > +                                  ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
> > +                                  AMDGPU_GFXHUB(0), 0);
> >
> >       if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
> >               gmc_v8_0_set_fault_enable_default(adev, false);
> > @@ -1448,7 +1449,7 @@ static int gmc_v8_0_process_interrupt(struct
> amdgpu_device *adev,
> >               dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
> >                       entry->src_id, entry->src_data[0]);
> >
> > -             task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
> > +             task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
> >               if (task_info) {
> >                       dev_err(adev->dev, " for process %s pid %d thread %s pid %d\n",
> >                               task_info->process_name, task_info->tgid,
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> > index b73136d390cc..e183e08b2c02 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> > @@ -556,10 +556,12 @@ static int gmc_v9_0_process_interrupt(struct
> amdgpu_device *adev,
> >       unsigned int vmhub;
> >       u64 addr;
> >       uint32_t cam_index = 0;
> > -     int ret, xcc_id = 0;
> > -     uint32_t node_id;
> > +     int ret;
> > +     uint32_t node_id, xcc_id, xcp_id;
> >
> >       node_id = entry->node_id;
> > +     xcc_id = amdgpu_amdkfd_node_id_to_xcc_id(adev, node_id);
> > +     xcp_id = xcc_id/adev->gfx.num_xcc_per_xcp;
> >
> >       addr = (u64)entry->src_data[0] << 12;
> >       addr |= ((u64)entry->src_data[1] & 0xf) << 44;
> > @@ -572,12 +574,6 @@ static int gmc_v9_0_process_interrupt(struct
> amdgpu_device *adev,
> >               vmhub = AMDGPU_MMHUB1(0);
> >       } else {
> >               hub_name = "gfxhub0";
> > -             if (adev->gfx.funcs->ih_node_to_logical_xcc) {
> > -                     xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev,
> > -                             node_id);
> > -                     if (xcc_id < 0)
> > -                             xcc_id = 0;
> > -             }
> >               vmhub = xcc_id;
> >       }
> >       hub = &adev->vmhub[vmhub];
> > @@ -631,7 +627,7 @@ static int gmc_v9_0_process_interrupt(struct
> amdgpu_device *adev,
> >               retry_fault ? "retry" : "no-retry",
> >               entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
> >
> > -     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
> > +     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid,
> xcp_id);
> >       if (task_info) {
> >               dev_err(adev->dev,
> >                       " for process %s pid %d thread %s pid %d)\n",
> > @@ -675,7 +671,7 @@ static int gmc_v9_0_process_interrupt(struct
> amdgpu_device *adev,
> >       if (!amdgpu_sriov_vf(adev))
> >               WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
> >
> > -     amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
> vmhub);
> > +     amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
> vmhub, xcp_id);
> >
> >       dev_err(adev->dev,
> >               "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > index 23ef4eb36b40..1ac4224bbe5b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > @@ -2182,7 +2182,7 @@ static int sdma_v4_0_print_iv_entry(struct
> amdgpu_device *adev,
> >                          instance, addr, entry->src_id, entry->ring_id, entry->vmid,
> >                          entry->pasid);
> >
> > -     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
> > +     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
> >       if (task_info) {
> >               dev_dbg_ratelimited(adev->dev,
> >                                   " for process %s pid %d thread %s pid %d\n",
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> > index 57f16c09abfc..c8b5c0302ca7 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> > @@ -1683,6 +1683,8 @@ static int sdma_v4_4_2_print_iv_entry(struct
> amdgpu_device *adev,
> >       int instance;
> >       struct amdgpu_task_info *task_info;
> >       u64 addr;
> > +     uint32_t xcc_id = amdgpu_amdkfd_node_id_to_xcc_id(adev, entry-
> >node_id);
> > +     uint32_t xcp_id = xcc_id/adev->gfx.num_xcc_per_xcp;
> >
> >       instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
> >       if (instance < 0 || instance >= adev->sdma.num_instances) {
> > @@ -1698,7 +1700,7 @@ static int sdma_v4_4_2_print_iv_entry(struct
> amdgpu_device *adev,
> >                           instance, addr, entry->src_id, entry->ring_id, entry->vmid,
> >                           entry->pasid);
> >
> > -     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
> > +     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid,
> xcp_id);
> >       if (task_info) {
> >               dev_dbg_ratelimited(adev->dev, " for process %s pid %d thread %s
> pid %d\n",
> >                                   task_info->process_name, task_info->tgid,
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> > index ea3792249209..c098fbaf0e1c 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> > @@ -1262,8 +1262,9 @@ void kfd_signal_reset_event(struct kfd_node
> *dev)
> >
> >               if (dev->dqm->detect_hang_count) {
> >                       struct amdgpu_task_info *ti;
> > +                     uint32_t xcp_id = dev->xcp ? dev->xcp->id : 0;
> >
> > -                     ti = amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid);
> > +                     ti = amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid,
> xcp_id);
> >                       if (ti) {
> >                               dev_err(dev->adev->dev,
> >                                       "Queues reset on process %s tid %d thread %s pid %d\n",
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
> > index 8e0d0356e810..d7cbf9525698 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
> > @@ -377,12 +377,8 @@ static void event_interrupt_wq_v10(struct
> kfd_node *dev,
> >               struct kfd_hsa_memory_exception_data exception_data;
> >
> >               /* gfxhub */
> > -             if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
> > -                     hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev-
> >adev,
> > -                             node_id);
> > -                     if (hub_inst < 0)
> > -                             hub_inst = 0;
> > -             }
> > +             if (!vmid_type)
> > +                     hub_inst = amdgpu_amdkfd_node_id_to_xcc_id(dev->adev,
> node_id);
> >
> >               /* mmhub */
> >               if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC)
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
> > index a9c3580be8c9..4708b8c811a5 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
> > @@ -437,12 +437,8 @@ static void event_interrupt_wq_v9(struct
> kfd_node *dev,
> >               struct kfd_hsa_memory_exception_data exception_data;
> >
> >               /* gfxhub */
> > -             if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
> > -                     hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev-
> >adev,
> > -                             node_id);
> > -                     if (hub_inst < 0)
> > -                             hub_inst = 0;
> > -             }
> > +             if (!vmid_type)
> > +                     hub_inst = amdgpu_amdkfd_node_id_to_xcc_id(dev->adev,
> node_id);
> >
> >               /* mmhub */
> >               if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC)
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> > index ea6a8e43bd5b..b5f2f5b1069c 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> > @@ -251,8 +251,9 @@ void
> kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
> >   void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
> >   {
> >       struct amdgpu_task_info *task_info;
> > +     uint32_t xcp_id = dev->xcp ? dev->xcp->id : 0;
> >
> > -     task_info = amdgpu_vm_get_task_info_pasid(dev->adev, pasid);
> > +     task_info = amdgpu_vm_get_task_info_pasid(dev->adev, pasid, xcp_id);
> >       if (task_info) {
> >               /* Report VM faults from user applications, not retry from kernel */
> >               if (task_info->pid)


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions
  2024-09-09 16:02   ` Kim, Jonathan
@ 2024-09-09 18:46     ` Christian König
  2024-09-10 15:23       ` Philip Yang
  0 siblings, 1 reply; 11+ messages in thread
From: Christian König @ 2024-09-09 18:46 UTC (permalink / raw)
  To: Kim, Jonathan, amd-gfx@lists.freedesktop.org
  Cc: Kuehling, Felix, Deucher, Alexander, Joshi, Mukul

Am 09.09.24 um 18:02 schrieb Kim, Jonathan:
> [Public]
>
>> -----Original Message-----
>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>> Sent: Thursday, September 5, 2024 10:24 AM
>> To: Kim, Jonathan <Jonathan.Kim@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Deucher, Alexander
>> <Alexander.Deucher@amd.com>; Joshi, Mukul <Mukul.Joshi@amd.com>
>> Subject: Re: [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions
>>
>> Caution: This message originated from an External Source. Use proper caution
>> when opening attachments, clicking links, or responding.
>>
>>
>> Am 19.08.24 um 19:59 schrieb Jonathan Kim:
>>> Currently multiple partitions will incorrectly overwrite the VM lookup
>>> table since the table is indexed by PASID and multiple partitions can
>>> register different VM objects on the same PASID.
>> That's a rather bad idea. Why do we have the same PASID for different VM
>> objects in the first place?
> Alex can probably elaborate on the KGD side, but from what I can see, the KMS driver open call has always assigned a new VM object per PASID on an open call.
> The KFD acquires and replaces the KGD PASID-VMID registration on its own compute process open/creation call.
> If this is the bad_idea you're referring to, then someone else will have to chime in.  I don't have much history on this unfortunately.

Yeah, Felix and I designed that.

> That aside, the current problem is, is that all KFD device structures are logical partitions and register their PASID-VM binding using this concept of a device.

As far as I can see that is the fundamental problem. This needs to be 
fixed instead.

> On the KGD side however, the registration table is maintained in the adev struct, which is a physical socket.
> So there's a mismatch in understanding of what a device is between the KFD & KGD with regard to the look up table that results in bad bindings.
>
> Adding a per-partition dimension to the existing lookup table resolves issues where seeing, for example, with memory violation interception and XNACK i.e bad bindings result in wrong vm object found to set no-retry flags on memory violations.

Yeah that is pretty much a no-go.

The PASID and how it is used is defined by the PCIe specifications. If 
we now start to assign multiple VMs to the same PASID then we are 
violating the PCIe specification.

The problems you see are most likely just the tip of the iceberg here.

Regards,
Christian.

>
> Jon
>
>> Regards,
>> Christian.
>>
>>> This results in loading the wrong VM object on PASID query.
>>>
>>> To correct this, setup the lookup table to be per-partition-per-PASID
>>> instead.
>>>
>>> Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    | 12 ++++
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  1 +
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       |  4 +-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c       |  7 ++-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 55 +++++++++++------
>> --
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        | 11 +++-
>>>    drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c        |  5 +-
>>>    drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c        |  5 +-
>>>    drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c        |  5 +-
>>>    drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c         |  3 +-
>>>    drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c         |  5 +-
>>>    drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c         | 16 ++----
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c        |  2 +-
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c      |  4 +-
>>>    drivers/gpu/drm/amd/amdkfd/kfd_events.c       |  3 +-
>>>    .../gpu/drm/amd/amdkfd/kfd_int_process_v10.c  |  8 +--
>>>    .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c   |  8 +--
>>>    drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c   |  3 +-
>>>    18 files changed, 92 insertions(+), 65 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>> index c272461d70a9..28db789610e1 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>> @@ -887,3 +887,15 @@ int amdgpu_amdkfd_unmap_hiq(struct
>> amdgpu_device *adev, u32 doorbell_off,
>>>        return r;
>>>    }
>>> +
>>> +int amdgpu_amdkfd_node_id_to_xcc_id(struct amdgpu_device *adev,
>> uint32_t node_id)
>>> +{
>>> +     if (adev->gfx.funcs->ih_node_to_logical_xcc) {
>>> +             int xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, node_id);
>>> +
>>> +             if (xcc_id >= 0)
>>> +                     return xcc_id;
>>> +     }
>>> +
>>> +     return 0;
>>> +}
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> index 4ed49265c764..bf8bb45d8ab6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> @@ -356,6 +356,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct
>> amdgpu_device *adev,
>>>                uint64_t size, u32 alloc_flag, int8_t xcp_id);
>>>
>>>    u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int
>> xcp_id);
>>> +int amdgpu_amdkfd_node_id_to_xcc_id(struct amdgpu_device *adev,
>> uint32_t node_id);
>>>    #define KFD_XCP_MEM_ID(adev, xcp_id) \
>>>                ((adev)->xcp_mgr && (xcp_id) >= 0 ?\
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>> index c6a1783fc9ef..bf9f8802e18d 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>> @@ -37,7 +37,7 @@ static enum drm_gpu_sched_stat
>> amdgpu_job_timedout(struct drm_sched_job *s_job)
>>>        struct amdgpu_job *job = to_amdgpu_job(s_job);
>>>        struct amdgpu_task_info *ti;
>>>        struct amdgpu_device *adev = ring->adev;
>>> -     int idx;
>>> +     int idx, xcp_id = !job->vm ? 0 : job->vm->xcp_id;
>>>        int r;
>>>
>>>        if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
>>> @@ -62,7 +62,7 @@ static enum drm_gpu_sched_stat
>> amdgpu_job_timedout(struct drm_sched_job *s_job)
>>>                job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
>>>                ring->fence_drv.sync_seq);
>>>
>>> -     ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid);
>>> +     ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid, xcp_id);
>>>        if (ti) {
>>>                dev_err(adev->dev,
>>>                        "Process information: process %s pid %d thread %s pid %d\n",
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> index d9fde38f6ee2..e413bf4a3e84 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> @@ -1275,17 +1275,20 @@ int amdgpu_info_ioctl(struct drm_device *dev,
>> void *data, struct drm_file *filp)
>>>                struct amdgpu_vm *vm = &fpriv->vm;
>>>                struct drm_amdgpu_info_gpuvm_fault gpuvm_fault;
>>>                unsigned long flags;
>>> +             int i;
>>>
>>>                if (!vm)
>>>                        return -EINVAL;
>>>
>>>                memset(&gpuvm_fault, 0, sizeof(gpuvm_fault));
>>>
>>> -             xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>> +             for (i = 0; i < adev->xcp_mgr->num_xcps; i++)
>>> +                     xa_lock_irqsave(&adev->vm_manager.pasids[i], flags);
>>>                gpuvm_fault.addr = vm->fault_info.addr;
>>>                gpuvm_fault.status = vm->fault_info.status;
>>>                gpuvm_fault.vmhub = vm->fault_info.vmhub;
>>> -             xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>>> +             for (i = 0; i < adev->xcp_mgr->num_xcps; i++)
>>> +                     xa_unlock_irqrestore(&adev->vm_manager.pasids[i], flags);
>>>
>>>                return copy_to_user(out, &gpuvm_fault,
>>>                                    min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index bcb729094521..f43e1c15f423 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -146,7 +146,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device
>> *adev, struct amdgpu_vm *vm,
>>>                return 0;
>>>
>>>        if (vm->pasid) {
>>> -             r = xa_err(xa_erase_irq(&adev->vm_manager.pasids, vm->pasid));
>>> +             r = xa_err(xa_erase_irq(&adev->vm_manager.pasids[vm->xcp_id],
>> vm->pasid));
>>>                if (r < 0)
>>>                        return r;
>>>
>>> @@ -154,7 +154,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device
>> *adev, struct amdgpu_vm *vm,
>>>        }
>>>
>>>        if (pasid) {
>>> -             r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm,
>>> +             r = xa_err(xa_store_irq(&adev->vm_manager.pasids[vm->xcp_id],
>> pasid, vm,
>>>                                        GFP_KERNEL));
>>>                if (r < 0)
>>>                        return r;
>>> @@ -2288,14 +2288,14 @@ static void
>> amdgpu_vm_destroy_task_info(struct kref *kref)
>>>    }
>>>
>>>    static inline struct amdgpu_vm *
>>> -amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid)
>>> +amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid,
>> u32 xcp_id)
>>>    {
>>>        struct amdgpu_vm *vm;
>>>        unsigned long flags;
>>>
>>> -     xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>> -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>>> -     xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>>> +     xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], flags);
>>> +     vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
>>> +     xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], flags);
>>>
>>>        return vm;
>>>    }
>>> @@ -2343,10 +2343,10 @@ amdgpu_vm_get_task_info_vm(struct
>> amdgpu_vm *vm)
>>>     * referenced down with amdgpu_vm_put_task_info.
>>>     */
>>>    struct amdgpu_task_info *
>>> -amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid)
>>> +amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid,
>> u32 xcp_id)
>>>    {
>>>        return amdgpu_vm_get_task_info_vm(
>>> -                     amdgpu_vm_get_vm_from_pasid(adev, pasid));
>>> +                     amdgpu_vm_get_vm_from_pasid(adev, pasid, xcp_id));
>>>    }
>>>
>>>    static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm)
>>> @@ -2481,6 +2481,8 @@ int amdgpu_vm_init(struct amdgpu_device
>> *adev, struct amdgpu_vm *vm,
>>>        amdgpu_bo_unreserve(vm->root.bo);
>>>        amdgpu_bo_unref(&root_bo);
>>>
>>> +     vm->xcp_id = xcp_id < 0 ? 0 : xcp_id;
>>> +
>>>        return 0;
>>>
>>>    error_free_root:
>>> @@ -2695,8 +2697,8 @@ void amdgpu_vm_manager_init(struct
>> amdgpu_device *adev)
>>>    #else
>>>        adev->vm_manager.vm_update_mode = 0;
>>>    #endif
>>> -
>>> -     xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
>>> +     for (i = 0; i < MAX_XCP; i++)
>>> +             xa_init_flags(&(adev->vm_manager.pasids[i]),
>> XA_FLAGS_LOCK_IRQ);
>>>    }
>>>
>>>    /**
>>> @@ -2708,10 +2710,15 @@ void amdgpu_vm_manager_init(struct
>> amdgpu_device *adev)
>>>     */
>>>    void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
>>>    {
>>> -     WARN_ON(!xa_empty(&adev->vm_manager.pasids));
>>> -     xa_destroy(&adev->vm_manager.pasids);
>>> +     int i;
>>> +
>>> +     for (i = 0; i < MAX_XCP; i++) {
>>> +             WARN_ON(!xa_empty(&adev->vm_manager.pasids[i]));
>>> +             xa_destroy(&adev->vm_manager.pasids[i]);
>>> +     }
>>>
>>>        amdgpu_vmid_mgr_fini(adev);
>>> +
>>>    }
>>>
>>>    /**
>>> @@ -2778,17 +2785,18 @@ bool amdgpu_vm_handle_fault(struct
>> amdgpu_device *adev, u32 pasid,
>>>        unsigned long irqflags;
>>>        uint64_t value, flags;
>>>        struct amdgpu_vm *vm;
>>> -     int r;
>>> +     int r, xcp_id;
>>>
>>> -     xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
>>> -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>>> +     xcp_id = amdgpu_amdkfd_node_id_to_xcc_id(adev, node_id)/adev-
>>> gfx.num_xcc_per_xcp;
>>> +     xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], irqflags);
>>> +     vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
>>>        if (vm) {
>>>                root = amdgpu_bo_ref(vm->root.bo);
>>>                is_compute_context = vm->is_compute_context;
>>>        } else {
>>>                root = NULL;
>>>        }
>>> -     xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
>>> +     xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], irqflags);
>>>
>>>        if (!root)
>>>                return false;
>>> @@ -2806,11 +2814,11 @@ bool amdgpu_vm_handle_fault(struct
>> amdgpu_device *adev, u32 pasid,
>>>                goto error_unref;
>>>
>>>        /* Double check that the VM still exists */
>>> -     xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
>>> -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>>> +     xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], irqflags);
>>> +     vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
>>>        if (vm && vm->root.bo != root)
>>>                vm = NULL;
>>> -     xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
>>> +     xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], irqflags);
>>>        if (!vm)
>>>                goto error_unlock;
>>>
>>> @@ -2968,14 +2976,15 @@ void amdgpu_vm_update_fault_cache(struct
>> amdgpu_device *adev,
>>>                                  unsigned int pasid,
>>>                                  uint64_t addr,
>>>                                  uint32_t status,
>>> -                               unsigned int vmhub)
>>> +                               unsigned int vmhub,
>>> +                               uint32_t xcp_id)
>>>    {
>>>        struct amdgpu_vm *vm;
>>>        unsigned long flags;
>>>
>>> -     xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>> +     xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], flags);
>>>
>>> -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>>> +     vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
>>>        /* Don't update the fault cache if status is 0.  In the multiple
>>>         * fault case, subsequent faults will return a 0 status which is
>>>         * useless for userspace and replaces the useful fault status, so
>>> @@ -3008,7 +3017,7 @@ void amdgpu_vm_update_fault_cache(struct
>> amdgpu_device *adev,
>>>                        WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
>>>                }
>>>        }
>>> -     xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>>> +     xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], flags);
>>>    }
>>>
>>>    /**
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index 046949c4b695..1499f5f731e9 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -35,6 +35,7 @@
>>>    #include "amdgpu_sync.h"
>>>    #include "amdgpu_ring.h"
>>>    #include "amdgpu_ids.h"
>>> +#include "amdgpu_xcp.h"
>>>
>>>    struct drm_exec;
>>>
>>> @@ -418,6 +419,9 @@ struct amdgpu_vm {
>>>
>>>        /* cached fault info */
>>>        struct amdgpu_vm_fault_info fault_info;
>>> +
>>> +     /* XCP ID */
>>> +     int xcp_id;
>>>    };
>>>
>>>    struct amdgpu_vm_manager {
>>> @@ -456,7 +460,7 @@ struct amdgpu_vm_manager {
>>>        /* PASID to VM mapping, will be used in interrupt context to
>>>         * look up VM of a page fault
>>>         */
>>> -     struct xarray                           pasids;
>>> +     struct xarray                           pasids[MAX_XCP];
>>>        /* Global registration of recent page fault information */
>>>        struct amdgpu_vm_fault_info     fault_info;
>>>    };
>>> @@ -550,7 +554,7 @@ bool amdgpu_vm_need_pipeline_sync(struct
>> amdgpu_ring *ring,
>>>    void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
>>>
>>>    struct amdgpu_task_info *
>>> -amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid);
>>> +amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid,
>> u32 xcp_id);
>>>    struct amdgpu_task_info *
>>>    amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm);
>>> @@ -649,7 +653,8 @@ void amdgpu_vm_update_fault_cache(struct
>> amdgpu_device *adev,
>>>                                  unsigned int pasid,
>>>                                  uint64_t addr,
>>>                                  uint32_t status,
>>> -                               unsigned int vmhub);
>>> +                               unsigned int vmhub,
>>> +                               uint32_t xcp_id);
>>>    void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
>>>                                 struct amdgpu_vm *vm,
>>>                                 struct dma_fence **fence);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>> index f0ceab3ce5bf..24b042febf5c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>> @@ -151,7 +151,8 @@ static int gmc_v10_0_process_interrupt(struct
>> amdgpu_device *adev,
>>>                WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>>>
>>>                amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
>>> -                                          entry->vmid_src ? AMDGPU_MMHUB0(0) :
>> AMDGPU_GFXHUB(0));
>>> +                                          entry->vmid_src ? AMDGPU_MMHUB0(0) :
>> AMDGPU_GFXHUB(0),
>>> +                                          0);
>>>        }
>>>
>>>        if (!printk_ratelimit())
>>> @@ -161,7 +162,7 @@ static int gmc_v10_0_process_interrupt(struct
>> amdgpu_device *adev,
>>>                "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
>>>                entry->vmid_src ? "mmhub" : "gfxhub",
>>>                entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
>>> -     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>> +     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>>>        if (task_info) {
>>>                dev_err(adev->dev,
>>>                        " in process %s pid %d thread %s pid %d\n",
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>>> index 2797fd84432b..3507046d33e6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>>> @@ -122,7 +122,8 @@ static int gmc_v11_0_process_interrupt(struct
>> amdgpu_device *adev,
>>>                WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>>>
>>>                amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
>>> -                                          entry->vmid_src ? AMDGPU_MMHUB0(0) :
>> AMDGPU_GFXHUB(0));
>>> +                                          entry->vmid_src ? AMDGPU_MMHUB0(0) :
>> AMDGPU_GFXHUB(0),
>>> +                                          0);
>>>        }
>>>
>>>        if (printk_ratelimit()) {
>>> @@ -132,7 +133,7 @@ static int gmc_v11_0_process_interrupt(struct
>> amdgpu_device *adev,
>>>                        "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
>>>                        entry->vmid_src ? "mmhub" : "gfxhub",
>>>                        entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
>>> -             task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>> +             task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>>>                if (task_info) {
>>>                        dev_err(adev->dev,
>>>                                " in process %s pid %d thread %s pid %d)\n",
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>> index 60acf676000b..9844564c6c74 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>> @@ -115,7 +115,8 @@ static int gmc_v12_0_process_interrupt(struct
>> amdgpu_device *adev,
>>>                WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>>>
>>>                amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
>>> -                                          entry->vmid_src ? AMDGPU_MMHUB0(0) :
>> AMDGPU_GFXHUB(0));
>>> +                                          entry->vmid_src ? AMDGPU_MMHUB0(0) :
>> AMDGPU_GFXHUB(0),
>>> +                                          0);
>>>        }
>>>
>>>        if (printk_ratelimit()) {
>>> @@ -125,7 +126,7 @@ static int gmc_v12_0_process_interrupt(struct
>> amdgpu_device *adev,
>>>                        "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
>>>                        entry->vmid_src ? "mmhub" : "gfxhub",
>>>                        entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
>>> -             task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>> +             task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>>>                if (task_info) {
>>>                        dev_err(adev->dev,
>>>                                " in process %s pid %d thread %s pid %d)\n",
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>> index 994432fb57ea..2cdb0cbb7c4d 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>> @@ -1268,7 +1268,8 @@ static int gmc_v7_0_process_interrupt(struct
>> amdgpu_device *adev,
>>>                return 0;
>>>
>>>        amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>> -                                  ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
>> AMDGPU_GFXHUB(0));
>>> +                                  ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
>>> +                                  AMDGPU_GFXHUB(0), 0);
>>>
>>>        if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
>>>                gmc_v7_0_set_fault_enable_default(adev, false);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>> index 86488c052f82..6855caeb7f74 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>> @@ -1437,7 +1437,8 @@ static int gmc_v8_0_process_interrupt(struct
>> amdgpu_device *adev,
>>>                return 0;
>>>
>>>        amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>> -                                  ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
>> AMDGPU_GFXHUB(0));
>>> +                                  ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
>>> +                                  AMDGPU_GFXHUB(0), 0);
>>>
>>>        if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
>>>                gmc_v8_0_set_fault_enable_default(adev, false);
>>> @@ -1448,7 +1449,7 @@ static int gmc_v8_0_process_interrupt(struct
>> amdgpu_device *adev,
>>>                dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
>>>                        entry->src_id, entry->src_data[0]);
>>>
>>> -             task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>> +             task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>>>                if (task_info) {
>>>                        dev_err(adev->dev, " for process %s pid %d thread %s pid %d\n",
>>>                                task_info->process_name, task_info->tgid,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> index b73136d390cc..e183e08b2c02 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> @@ -556,10 +556,12 @@ static int gmc_v9_0_process_interrupt(struct
>> amdgpu_device *adev,
>>>        unsigned int vmhub;
>>>        u64 addr;
>>>        uint32_t cam_index = 0;
>>> -     int ret, xcc_id = 0;
>>> -     uint32_t node_id;
>>> +     int ret;
>>> +     uint32_t node_id, xcc_id, xcp_id;
>>>
>>>        node_id = entry->node_id;
>>> +     xcc_id = amdgpu_amdkfd_node_id_to_xcc_id(adev, node_id);
>>> +     xcp_id = xcc_id/adev->gfx.num_xcc_per_xcp;
>>>
>>>        addr = (u64)entry->src_data[0] << 12;
>>>        addr |= ((u64)entry->src_data[1] & 0xf) << 44;
>>> @@ -572,12 +574,6 @@ static int gmc_v9_0_process_interrupt(struct
>> amdgpu_device *adev,
>>>                vmhub = AMDGPU_MMHUB1(0);
>>>        } else {
>>>                hub_name = "gfxhub0";
>>> -             if (adev->gfx.funcs->ih_node_to_logical_xcc) {
>>> -                     xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev,
>>> -                             node_id);
>>> -                     if (xcc_id < 0)
>>> -                             xcc_id = 0;
>>> -             }
>>>                vmhub = xcc_id;
>>>        }
>>>        hub = &adev->vmhub[vmhub];
>>> @@ -631,7 +627,7 @@ static int gmc_v9_0_process_interrupt(struct
>> amdgpu_device *adev,
>>>                retry_fault ? "retry" : "no-retry",
>>>                entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
>>>
>>> -     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>> +     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid,
>> xcp_id);
>>>        if (task_info) {
>>>                dev_err(adev->dev,
>>>                        " for process %s pid %d thread %s pid %d)\n",
>>> @@ -675,7 +671,7 @@ static int gmc_v9_0_process_interrupt(struct
>> amdgpu_device *adev,
>>>        if (!amdgpu_sriov_vf(adev))
>>>                WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>>>
>>> -     amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
>> vmhub);
>>> +     amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
>> vmhub, xcp_id);
>>>        dev_err(adev->dev,
>>>                "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> index 23ef4eb36b40..1ac4224bbe5b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> @@ -2182,7 +2182,7 @@ static int sdma_v4_0_print_iv_entry(struct
>> amdgpu_device *adev,
>>>                           instance, addr, entry->src_id, entry->ring_id, entry->vmid,
>>>                           entry->pasid);
>>>
>>> -     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>> +     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>>>        if (task_info) {
>>>                dev_dbg_ratelimited(adev->dev,
>>>                                    " for process %s pid %d thread %s pid %d\n",
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>>> index 57f16c09abfc..c8b5c0302ca7 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>>> @@ -1683,6 +1683,8 @@ static int sdma_v4_4_2_print_iv_entry(struct
>> amdgpu_device *adev,
>>>        int instance;
>>>        struct amdgpu_task_info *task_info;
>>>        u64 addr;
>>> +     uint32_t xcc_id = amdgpu_amdkfd_node_id_to_xcc_id(adev, entry-
>>> node_id);
>>> +     uint32_t xcp_id = xcc_id/adev->gfx.num_xcc_per_xcp;
>>>
>>>        instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
>>>        if (instance < 0 || instance >= adev->sdma.num_instances) {
>>> @@ -1698,7 +1700,7 @@ static int sdma_v4_4_2_print_iv_entry(struct
>> amdgpu_device *adev,
>>>                            instance, addr, entry->src_id, entry->ring_id, entry->vmid,
>>>                            entry->pasid);
>>>
>>> -     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>> +     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid,
>> xcp_id);
>>>        if (task_info) {
>>>                dev_dbg_ratelimited(adev->dev, " for process %s pid %d thread %s
>> pid %d\n",
>>>                                    task_info->process_name, task_info->tgid,
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>> index ea3792249209..c098fbaf0e1c 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>> @@ -1262,8 +1262,9 @@ void kfd_signal_reset_event(struct kfd_node
>> *dev)
>>>                if (dev->dqm->detect_hang_count) {
>>>                        struct amdgpu_task_info *ti;
>>> +                     uint32_t xcp_id = dev->xcp ? dev->xcp->id : 0;
>>>
>>> -                     ti = amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid);
>>> +                     ti = amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid,
>> xcp_id);
>>>                        if (ti) {
>>>                                dev_err(dev->adev->dev,
>>>                                        "Queues reset on process %s tid %d thread %s pid %d\n",
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>>> index 8e0d0356e810..d7cbf9525698 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>>> @@ -377,12 +377,8 @@ static void event_interrupt_wq_v10(struct
>> kfd_node *dev,
>>>                struct kfd_hsa_memory_exception_data exception_data;
>>>
>>>                /* gfxhub */
>>> -             if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
>>> -                     hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev-
>>> adev,
>>> -                             node_id);
>>> -                     if (hub_inst < 0)
>>> -                             hub_inst = 0;
>>> -             }
>>> +             if (!vmid_type)
>>> +                     hub_inst = amdgpu_amdkfd_node_id_to_xcc_id(dev->adev,
>> node_id);
>>>                /* mmhub */
>>>                if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC)
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>>> index a9c3580be8c9..4708b8c811a5 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>>> @@ -437,12 +437,8 @@ static void event_interrupt_wq_v9(struct
>> kfd_node *dev,
>>>                struct kfd_hsa_memory_exception_data exception_data;
>>>
>>>                /* gfxhub */
>>> -             if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
>>> -                     hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev-
>>> adev,
>>> -                             node_id);
>>> -                     if (hub_inst < 0)
>>> -                             hub_inst = 0;
>>> -             }
>>> +             if (!vmid_type)
>>> +                     hub_inst = amdgpu_amdkfd_node_id_to_xcc_id(dev->adev,
>> node_id);
>>>                /* mmhub */
>>>                if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC)
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>>> index ea6a8e43bd5b..b5f2f5b1069c 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>>> @@ -251,8 +251,9 @@ void
>> kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
>>>    void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
>>>    {
>>>        struct amdgpu_task_info *task_info;
>>> +     uint32_t xcp_id = dev->xcp ? dev->xcp->id : 0;
>>>
>>> -     task_info = amdgpu_vm_get_task_info_pasid(dev->adev, pasid);
>>> +     task_info = amdgpu_vm_get_task_info_pasid(dev->adev, pasid, xcp_id);
>>>        if (task_info) {
>>>                /* Report VM faults from user applications, not retry from kernel */
>>>                if (task_info->pid)


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions
  2024-09-09 18:46     ` Christian König
@ 2024-09-10 15:23       ` Philip Yang
  2024-09-10 20:47         ` Kim, Jonathan
  0 siblings, 1 reply; 11+ messages in thread
From: Philip Yang @ 2024-09-10 15:23 UTC (permalink / raw)
  To: Christian König, Kim, Jonathan,
	amd-gfx@lists.freedesktop.org
  Cc: Kuehling, Felix, Deucher, Alexander, Joshi, Mukul

[-- Attachment #1: Type: text/html, Size: 81179 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions
  2024-09-10 15:23       ` Philip Yang
@ 2024-09-10 20:47         ` Kim, Jonathan
  2024-09-10 23:59           ` Chen, Xiaogang
  0 siblings, 1 reply; 11+ messages in thread
From: Kim, Jonathan @ 2024-09-10 20:47 UTC (permalink / raw)
  To: Yang, Philip, Koenig, Christian, amd-gfx@lists.freedesktop.org
  Cc: Kuehling, Felix, Deucher, Alexander, Joshi, Mukul

[-- Attachment #1: Type: text/plain, Size: 32122 bytes --]

[Public]

KMS open still set per pasid-vm bindings per adev (socket) so I don’t see how the per-partition overwrite PASID issue is primarily a KFD concern.
Are you saying the KFD process devices holds a shadow copy of the correct VM during page restore during fault?
Doesn’t it acquire the wrong VM object on process init in the first place?
Even if it were the case the KFD had a separate VM reference, the underlying IRQ fault handling is still broken.
We probably don’t want to bandage over something to fix one symptom.

Jon


From: Yang, Philip <Philip.Yang@amd.com>
Sent: Tuesday, September 10, 2024 11:24 AM
To: Koenig, Christian <Christian.Koenig@amd.com>; Kim, Jonathan <Jonathan.Kim@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Joshi, Mukul <Mukul.Joshi@amd.com>
Subject: Re: [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions



On 2024-09-09 14:46, Christian König wrote:
Am 09.09.24 um 18:02 schrieb Kim, Jonathan:

[Public]


-----Original Message-----
From: Christian König <ckoenig.leichtzumerken@gmail.com><mailto:ckoenig.leichtzumerken@gmail.com>
Sent: Thursday, September 5, 2024 10:24 AM
To: Kim, Jonathan <Jonathan.Kim@amd.com><mailto:Jonathan.Kim@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Cc: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>; Deucher, Alexander
<Alexander.Deucher@amd.com><mailto:Alexander.Deucher@amd.com>; Joshi, Mukul <Mukul.Joshi@amd.com><mailto:Mukul.Joshi@amd.com>
Subject: Re: [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions

Caution: This message originated from an External Source. Use proper caution
when opening attachments, clicking links, or responding.


Am 19.08.24 um 19:59 schrieb Jonathan Kim:

Currently multiple partitions will incorrectly overwrite the VM lookup
table since the table is indexed by PASID and multiple partitions can
register different VM objects on the same PASID.
That's a rather bad idea. Why do we have the same PASID for different VM
objects in the first place?
Alex can probably elaborate on the KGD side, but from what I can see, the KMS driver open call has always assigned a new VM object per PASID on an open call.
The KFD acquires and replaces the KGD PASID-VMID registration on its own compute process open/creation call.
If this is the bad_idea you're referring to, then someone else will have to chime in.  I don't have much history on this unfortunately.

Yeah, Felix and I designed that.

app opens drm node to create vm for each partition, with different vm->pasid for each vm, issue is from kfd_ioctl_acquire_vm -> kfd_process_device_init_vm ->  amdgpu_amdkfd_gpuvm_set_vm_pasid, to replace all vm->pasid with kfd process->pasid, which is from open kfd node. This ends up to store only one vm to adev->vm_manager.pasids with KFD process pasid, so we cannot retrieve correct vm from adev->vm_manager.pasids on mGPUs or multiple partitions.


That aside, the current problem is, is that all KFD device structures are logical partitions and register their PASID-VM binding using this concept of a device.

As far as I can see that is the fundamental problem. This needs to be fixed instead.


On the KGD side however, the registration table is maintained in the adev struct, which is a physical socket.
So there's a mismatch in understanding of what a device is between the KFD & KGD with regard to the look up table that results in bad bindings.

Adding a per-partition dimension to the existing lookup table resolves issues where seeing, for example, with memory violation interception and XNACK i.e bad bindings result in wrong vm object found to set no-retry flags on memory violations.

svm_range_restore_pages retry fault recover uses fault pasid to get kfd process, and use the fault node_id to get pdd->vm, maybe you can use this way to fix the debugger issue.

Regards,

Philip

Yeah that is pretty much a no-go.

The PASID and how it is used is defined by the PCIe specifications. If we now start to assign multiple VMs to the same PASID then we are violating the PCIe specification.

The problems you see are most likely just the tip of the iceberg here.

Regards,
Christian.



Jon


Regards,
Christian.


This results in loading the wrong VM object on PASID query.

To correct this, setup the lookup table to be per-partition-per-PASID
instead.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com><mailto:jonathan.kim@amd.com>
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    | 12 ++++
   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  1 +
   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       |  4 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c       |  7 ++-
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 55 +++++++++++------
--

   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        | 11 +++-
   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c        |  5 +-
   drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c        |  5 +-
   drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c        |  5 +-
   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c         |  3 +-
   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c         |  5 +-
   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c         | 16 ++----
   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c        |  2 +-
   drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c      |  4 +-
   drivers/gpu/drm/amd/amdkfd/kfd_events.c       |  3 +-
   .../gpu/drm/amd/amdkfd/kfd_int_process_v10.c  |  8 +--
   .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c   |  8 +--
   drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c   |  3 +-
   18 files changed, 92 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

index c272461d70a9..28db789610e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -887,3 +887,15 @@ int amdgpu_amdkfd_unmap_hiq(struct
amdgpu_device *adev, u32 doorbell_off,

       return r;
   }
+
+int amdgpu_amdkfd_node_id_to_xcc_id(struct amdgpu_device *adev,
uint32_t node_id)

+{
+     if (adev->gfx.funcs->ih_node_to_logical_xcc) {
+             int xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, node_id);
+
+             if (xcc_id >= 0)
+                     return xcc_id;
+     }
+
+     return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

index 4ed49265c764..bf8bb45d8ab6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -356,6 +356,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct
amdgpu_device *adev,

               uint64_t size, u32 alloc_flag, int8_t xcp_id);

   u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int
xcp_id);

+int amdgpu_amdkfd_node_id_to_xcc_id(struct amdgpu_device *adev,
uint32_t node_id);

   #define KFD_XCP_MEM_ID(adev, xcp_id) \
               ((adev)->xcp_mgr && (xcp_id) >= 0 ?\
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index c6a1783fc9ef..bf9f8802e18d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -37,7 +37,7 @@ static enum drm_gpu_sched_stat
amdgpu_job_timedout(struct drm_sched_job *s_job)

       struct amdgpu_job *job = to_amdgpu_job(s_job);
       struct amdgpu_task_info *ti;
       struct amdgpu_device *adev = ring->adev;
-     int idx;
+     int idx, xcp_id = !job->vm ? 0 : job->vm->xcp_id;
       int r;

       if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
@@ -62,7 +62,7 @@ static enum drm_gpu_sched_stat
amdgpu_job_timedout(struct drm_sched_job *s_job)

               job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
               ring->fence_drv.sync_seq);

-     ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid);
+     ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid, xcp_id);
       if (ti) {
               dev_err(adev->dev,
                       "Process information: process %s pid %d thread %s pid %d\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index d9fde38f6ee2..e413bf4a3e84 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1275,17 +1275,20 @@ int amdgpu_info_ioctl(struct drm_device *dev,
void *data, struct drm_file *filp)

               struct amdgpu_vm *vm = &fpriv->vm;
               struct drm_amdgpu_info_gpuvm_fault gpuvm_fault;
               unsigned long flags;
+             int i;

               if (!vm)
                       return -EINVAL;

               memset(&gpuvm_fault, 0, sizeof(gpuvm_fault));

-             xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+             for (i = 0; i < adev->xcp_mgr->num_xcps; i++)
+                     xa_lock_irqsave(&adev->vm_manager.pasids[i], flags);
               gpuvm_fault.addr = vm->fault_info.addr;
               gpuvm_fault.status = vm->fault_info.status;
               gpuvm_fault.vmhub = vm->fault_info.vmhub;
-             xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+             for (i = 0; i < adev->xcp_mgr->num_xcps; i++)
+                     xa_unlock_irqrestore(&adev->vm_manager.pasids[i], flags);

               return copy_to_user(out, &gpuvm_fault,
                                   min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index bcb729094521..f43e1c15f423 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -146,7 +146,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device
*adev, struct amdgpu_vm *vm,

               return 0;

       if (vm->pasid) {
-             r = xa_err(xa_erase_irq(&adev->vm_manager.pasids, vm->pasid));
+             r = xa_err(xa_erase_irq(&adev->vm_manager.pasids[vm->xcp_id],
vm->pasid));

               if (r < 0)
                       return r;

@@ -154,7 +154,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device
*adev, struct amdgpu_vm *vm,

       }

       if (pasid) {
-             r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm,
+             r = xa_err(xa_store_irq(&adev->vm_manager.pasids[vm->xcp_id],
pasid, vm,

                                       GFP_KERNEL));
               if (r < 0)
                       return r;
@@ -2288,14 +2288,14 @@ static void
amdgpu_vm_destroy_task_info(struct kref *kref)

   }

   static inline struct amdgpu_vm *
-amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid)
+amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid,
u32 xcp_id)

   {
       struct amdgpu_vm *vm;
       unsigned long flags;

-     xa_lock_irqsave(&adev->vm_manager.pasids, flags);
-     vm = xa_load(&adev->vm_manager.pasids, pasid);
-     xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+     xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], flags);
+     vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
+     xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], flags);

       return vm;
   }
@@ -2343,10 +2343,10 @@ amdgpu_vm_get_task_info_vm(struct
amdgpu_vm *vm)

    * referenced down with amdgpu_vm_put_task_info.
    */
   struct amdgpu_task_info *
-amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid)
+amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid,
u32 xcp_id)

   {
       return amdgpu_vm_get_task_info_vm(
-                     amdgpu_vm_get_vm_from_pasid(adev, pasid));
+                     amdgpu_vm_get_vm_from_pasid(adev, pasid, xcp_id));
   }

   static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm)
@@ -2481,6 +2481,8 @@ int amdgpu_vm_init(struct amdgpu_device
*adev, struct amdgpu_vm *vm,

       amdgpu_bo_unreserve(vm->root.bo);
       amdgpu_bo_unref(&root_bo);

+     vm->xcp_id = xcp_id < 0 ? 0 : xcp_id;
+
       return 0;

   error_free_root:
@@ -2695,8 +2697,8 @@ void amdgpu_vm_manager_init(struct
amdgpu_device *adev)

   #else
       adev->vm_manager.vm_update_mode = 0;
   #endif
-
-     xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
+     for (i = 0; i < MAX_XCP; i++)
+             xa_init_flags(&(adev->vm_manager.pasids[i]),
XA_FLAGS_LOCK_IRQ);

   }

   /**
@@ -2708,10 +2710,15 @@ void amdgpu_vm_manager_init(struct
amdgpu_device *adev)

    */
   void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
   {
-     WARN_ON(!xa_empty(&adev->vm_manager.pasids));
-     xa_destroy(&adev->vm_manager.pasids);
+     int i;
+
+     for (i = 0; i < MAX_XCP; i++) {
+             WARN_ON(!xa_empty(&adev->vm_manager.pasids[i]));
+             xa_destroy(&adev->vm_manager.pasids[i]);
+     }

       amdgpu_vmid_mgr_fini(adev);
+
   }

   /**
@@ -2778,17 +2785,18 @@ bool amdgpu_vm_handle_fault(struct
amdgpu_device *adev, u32 pasid,

       unsigned long irqflags;
       uint64_t value, flags;
       struct amdgpu_vm *vm;
-     int r;
+     int r, xcp_id;

-     xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
-     vm = xa_load(&adev->vm_manager.pasids, pasid);
+     xcp_id = amdgpu_amdkfd_node_id_to_xcc_id(adev, node_id)/adev-
gfx.num_xcc_per_xcp;
+     xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], irqflags);
+     vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
       if (vm) {
               root = amdgpu_bo_ref(vm->root.bo);
               is_compute_context = vm->is_compute_context;
       } else {
               root = NULL;
       }
-     xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
+     xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], irqflags);

       if (!root)
               return false;
@@ -2806,11 +2814,11 @@ bool amdgpu_vm_handle_fault(struct
amdgpu_device *adev, u32 pasid,

               goto error_unref;

       /* Double check that the VM still exists */
-     xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
-     vm = xa_load(&adev->vm_manager.pasids, pasid);
+     xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], irqflags);
+     vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
       if (vm && vm->root.bo != root)
               vm = NULL;
-     xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
+     xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], irqflags);
       if (!vm)
               goto error_unlock;

@@ -2968,14 +2976,15 @@ void amdgpu_vm_update_fault_cache(struct
amdgpu_device *adev,

                                 unsigned int pasid,
                                 uint64_t addr,
                                 uint32_t status,
-                               unsigned int vmhub)
+                               unsigned int vmhub,
+                               uint32_t xcp_id)
   {
       struct amdgpu_vm *vm;
       unsigned long flags;

-     xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+     xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], flags);

-     vm = xa_load(&adev->vm_manager.pasids, pasid);
+     vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
       /* Don't update the fault cache if status is 0.  In the multiple
        * fault case, subsequent faults will return a 0 status which is
        * useless for userspace and replaces the useful fault status, so
@@ -3008,7 +3017,7 @@ void amdgpu_vm_update_fault_cache(struct
amdgpu_device *adev,

                       WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
               }
       }
-     xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+     xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], flags);
   }

   /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

index 046949c4b695..1499f5f731e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -35,6 +35,7 @@
   #include "amdgpu_sync.h"
   #include "amdgpu_ring.h"
   #include "amdgpu_ids.h"
+#include "amdgpu_xcp.h"

   struct drm_exec;

@@ -418,6 +419,9 @@ struct amdgpu_vm {

       /* cached fault info */
       struct amdgpu_vm_fault_info fault_info;
+
+     /* XCP ID */
+     int xcp_id;
   };

   struct amdgpu_vm_manager {
@@ -456,7 +460,7 @@ struct amdgpu_vm_manager {
       /* PASID to VM mapping, will be used in interrupt context to
        * look up VM of a page fault
        */
-     struct xarray                           pasids;
+     struct xarray                           pasids[MAX_XCP];
       /* Global registration of recent page fault information */
       struct amdgpu_vm_fault_info     fault_info;
   };
@@ -550,7 +554,7 @@ bool amdgpu_vm_need_pipeline_sync(struct
amdgpu_ring *ring,

   void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);

   struct amdgpu_task_info *
-amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid);
+amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid,
u32 xcp_id);

   struct amdgpu_task_info *
   amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm);
@@ -649,7 +653,8 @@ void amdgpu_vm_update_fault_cache(struct
amdgpu_device *adev,

                                 unsigned int pasid,
                                 uint64_t addr,
                                 uint32_t status,
-                               unsigned int vmhub);
+                               unsigned int vmhub,
+                               uint32_t xcp_id);
   void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
                                struct amdgpu_vm *vm,
                                struct dma_fence **fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c

index f0ceab3ce5bf..24b042febf5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -151,7 +151,8 @@ static int gmc_v10_0_process_interrupt(struct
amdgpu_device *adev,

               WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);

               amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
-                                          entry->vmid_src ? AMDGPU_MMHUB0(0) :
AMDGPU_GFXHUB(0));

+                                          entry->vmid_src ? AMDGPU_MMHUB0(0) :
AMDGPU_GFXHUB(0),

+                                          0);
       }

       if (!printk_ratelimit())
@@ -161,7 +162,7 @@ static int gmc_v10_0_process_interrupt(struct
amdgpu_device *adev,

               "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
               entry->vmid_src ? "mmhub" : "gfxhub",
               entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
-     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
       if (task_info) {
               dev_err(adev->dev,
                       " in process %s pid %d thread %s pid %d\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c

index 2797fd84432b..3507046d33e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -122,7 +122,8 @@ static int gmc_v11_0_process_interrupt(struct
amdgpu_device *adev,

               WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);

               amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
-                                          entry->vmid_src ? AMDGPU_MMHUB0(0) :
AMDGPU_GFXHUB(0));

+                                          entry->vmid_src ? AMDGPU_MMHUB0(0) :
AMDGPU_GFXHUB(0),

+                                          0);
       }

       if (printk_ratelimit()) {
@@ -132,7 +133,7 @@ static int gmc_v11_0_process_interrupt(struct
amdgpu_device *adev,

                       "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
                       entry->vmid_src ? "mmhub" : "gfxhub",
                       entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
-             task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+             task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
               if (task_info) {
                       dev_err(adev->dev,
                               " in process %s pid %d thread %s pid %d)\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c

index 60acf676000b..9844564c6c74 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -115,7 +115,8 @@ static int gmc_v12_0_process_interrupt(struct
amdgpu_device *adev,

               WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);

               amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
-                                          entry->vmid_src ? AMDGPU_MMHUB0(0) :
AMDGPU_GFXHUB(0));

+                                          entry->vmid_src ? AMDGPU_MMHUB0(0) :
AMDGPU_GFXHUB(0),

+                                          0);
       }

       if (printk_ratelimit()) {
@@ -125,7 +126,7 @@ static int gmc_v12_0_process_interrupt(struct
amdgpu_device *adev,

                       "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
                       entry->vmid_src ? "mmhub" : "gfxhub",
                       entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
-             task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+             task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
               if (task_info) {
                       dev_err(adev->dev,
                               " in process %s pid %d thread %s pid %d)\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c

index 994432fb57ea..2cdb0cbb7c4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -1268,7 +1268,8 @@ static int gmc_v7_0_process_interrupt(struct
amdgpu_device *adev,

               return 0;

       amdgpu_vm_update_fault_cache(adev, entry->pasid,
-                                  ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
AMDGPU_GFXHUB(0));

+                                  ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
+                                  AMDGPU_GFXHUB(0), 0);

       if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
               gmc_v7_0_set_fault_enable_default(adev, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c

index 86488c052f82..6855caeb7f74 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1437,7 +1437,8 @@ static int gmc_v8_0_process_interrupt(struct
amdgpu_device *adev,

               return 0;

       amdgpu_vm_update_fault_cache(adev, entry->pasid,
-                                  ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
AMDGPU_GFXHUB(0));

+                                  ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
+                                  AMDGPU_GFXHUB(0), 0);

       if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
               gmc_v8_0_set_fault_enable_default(adev, false);
@@ -1448,7 +1449,7 @@ static int gmc_v8_0_process_interrupt(struct
amdgpu_device *adev,

               dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
                       entry->src_id, entry->src_data[0]);

-             task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+             task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
               if (task_info) {
                       dev_err(adev->dev, " for process %s pid %d thread %s pid %d\n",
                               task_info->process_name, task_info->tgid,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index b73136d390cc..e183e08b2c02 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -556,10 +556,12 @@ static int gmc_v9_0_process_interrupt(struct
amdgpu_device *adev,

       unsigned int vmhub;
       u64 addr;
       uint32_t cam_index = 0;
-     int ret, xcc_id = 0;
-     uint32_t node_id;
+     int ret;
+     uint32_t node_id, xcc_id, xcp_id;

       node_id = entry->node_id;
+     xcc_id = amdgpu_amdkfd_node_id_to_xcc_id(adev, node_id);
+     xcp_id = xcc_id/adev->gfx.num_xcc_per_xcp;

       addr = (u64)entry->src_data[0] << 12;
       addr |= ((u64)entry->src_data[1] & 0xf) << 44;
@@ -572,12 +574,6 @@ static int gmc_v9_0_process_interrupt(struct
amdgpu_device *adev,

               vmhub = AMDGPU_MMHUB1(0);
       } else {
               hub_name = "gfxhub0";
-             if (adev->gfx.funcs->ih_node_to_logical_xcc) {
-                     xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev,
-                             node_id);
-                     if (xcc_id < 0)
-                             xcc_id = 0;
-             }
               vmhub = xcc_id;
       }
       hub = &adev->vmhub[vmhub];
@@ -631,7 +627,7 @@ static int gmc_v9_0_process_interrupt(struct
amdgpu_device *adev,

               retry_fault ? "retry" : "no-retry",
               entry->src_id, entry->ring_id, entry->vmid, entry->pasid);

-     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid,
xcp_id);

       if (task_info) {
               dev_err(adev->dev,
                       " for process %s pid %d thread %s pid %d)\n",
@@ -675,7 +671,7 @@ static int gmc_v9_0_process_interrupt(struct
amdgpu_device *adev,

       if (!amdgpu_sriov_vf(adev))
               WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);

-     amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
vmhub);

+     amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
vmhub, xcp_id);

       dev_err(adev->dev,
               "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c

index 23ef4eb36b40..1ac4224bbe5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -2182,7 +2182,7 @@ static int sdma_v4_0_print_iv_entry(struct
amdgpu_device *adev,

                          instance, addr, entry->src_id, entry->ring_id, entry->vmid,
                          entry->pasid);

-     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
       if (task_info) {
               dev_dbg_ratelimited(adev->dev,
                                   " for process %s pid %d thread %s pid %d\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c

index 57f16c09abfc..c8b5c0302ca7 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -1683,6 +1683,8 @@ static int sdma_v4_4_2_print_iv_entry(struct
amdgpu_device *adev,

       int instance;
       struct amdgpu_task_info *task_info;
       u64 addr;
+     uint32_t xcc_id = amdgpu_amdkfd_node_id_to_xcc_id(adev, entry-
node_id);
+     uint32_t xcp_id = xcc_id/adev->gfx.num_xcc_per_xcp;

       instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
       if (instance < 0 || instance >= adev->sdma.num_instances) {
@@ -1698,7 +1700,7 @@ static int sdma_v4_4_2_print_iv_entry(struct
amdgpu_device *adev,

                           instance, addr, entry->src_id, entry->ring_id, entry->vmid,
                           entry->pasid);

-     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+     task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid,
xcp_id);

       if (task_info) {
               dev_dbg_ratelimited(adev->dev, " for process %s pid %d thread %s
pid %d\n",

                                   task_info->process_name, task_info->tgid,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
b/drivers/gpu/drm/amd/amdkfd/kfd_events.c

index ea3792249209..c098fbaf0e1c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -1262,8 +1262,9 @@ void kfd_signal_reset_event(struct kfd_node
*dev)

               if (dev->dqm->detect_hang_count) {
                       struct amdgpu_task_info *ti;
+                     uint32_t xcp_id = dev->xcp ? dev->xcp->id : 0;

-                     ti = amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid);
+                     ti = amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid,
xcp_id);

                       if (ti) {
                               dev_err(dev->adev->dev,
                                       "Queues reset on process %s tid %d thread %s pid %d\n",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c

index 8e0d0356e810..d7cbf9525698 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
@@ -377,12 +377,8 @@ static void event_interrupt_wq_v10(struct
kfd_node *dev,

               struct kfd_hsa_memory_exception_data exception_data;

               /* gfxhub */
-             if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
-                     hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev-
adev,
-                             node_id);
-                     if (hub_inst < 0)
-                             hub_inst = 0;
-             }
+             if (!vmid_type)
+                     hub_inst = amdgpu_amdkfd_node_id_to_xcc_id(dev->adev,
node_id);

               /* mmhub */
               if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c

index a9c3580be8c9..4708b8c811a5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -437,12 +437,8 @@ static void event_interrupt_wq_v9(struct
kfd_node *dev,

               struct kfd_hsa_memory_exception_data exception_data;

               /* gfxhub */
-             if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
-                     hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev-
adev,
-                             node_id);
-                     if (hub_inst < 0)
-                             hub_inst = 0;
-             }
+             if (!vmid_type)
+                     hub_inst = amdgpu_amdkfd_node_id_to_xcc_id(dev->adev,
node_id);

               /* mmhub */
               if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c

index ea6a8e43bd5b..b5f2f5b1069c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -251,8 +251,9 @@ void
kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,

   void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
   {
       struct amdgpu_task_info *task_info;
+     uint32_t xcp_id = dev->xcp ? dev->xcp->id : 0;

-     task_info = amdgpu_vm_get_task_info_pasid(dev->adev, pasid);
+     task_info = amdgpu_vm_get_task_info_pasid(dev->adev, pasid, xcp_id);
       if (task_info) {
               /* Report VM faults from user applications, not retry from kernel */
               if (task_info->pid)


[-- Attachment #2: Type: text/html, Size: 76655 bytes --]

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions
  2024-09-10 20:47         ` Kim, Jonathan
@ 2024-09-10 23:59           ` Chen, Xiaogang
  2024-09-11  6:54             ` Christian König
  0 siblings, 1 reply; 11+ messages in thread
From: Chen, Xiaogang @ 2024-09-10 23:59 UTC (permalink / raw)
  To: Kim, Jonathan, Yang, Philip, Koenig, Christian,
	amd-gfx@lists.freedesktop.org
  Cc: Kuehling, Felix, Deucher, Alexander, Joshi, Mukul

[-- Attachment #1: Type: text/plain, Size: 48382 bytes --]


You want have 1:1 mapping between vm and pasid so can query vm from 
pasid.  I think there is a basic existing issue that we cannot have vm 
and pasid 1:1 correspondence.

PASIDs are global address space identifiers that can be shared between 
the GPU, an IOMMU and the driver. One app should have one pasid that 
iommu uses to decide which page table to use when device access system 
resource. But one app can open render/kfd node multiple times even for 
one gpu. That said one app could have multiple GPU vms .

I think we did not have this issue because app usually open a rent node 
or kfd node only once. With one adev has multiple partitions there are 
multiple vms on one adev, so have this issue.  But the root cause is not 
from multiple partitions and solution is not to introduce multiple 
pasids. I think we should have one pasid for one app and use different 
way to get vm from pasid.


Regards

Xiaogang

On 9/10/2024 3:47 PM, Kim, Jonathan wrote:
>
> [Public]
>
>
>
> 	
> Caution: This message originated from an External Source. Use proper 
> caution when opening attachments, clicking links, or responding.
>
>
> [Public]
>
>
> KMS open still set per pasid-vm bindings per adev (socket) so I don’t 
> see how the per-partition overwrite PASID issue is primarily a KFD 
> concern.
>
> Are you saying the KFD process devices holds a shadow copy of the 
> correct VM during page restore during fault?
>
> Doesn’t it acquire the wrong VM object on process init in the first place?
>
> Even if it were the case the KFD had a separate VM reference, the 
> underlying IRQ fault handling is still broken.
>
> We probably don’t want to bandage over something to fix one symptom.
>
> Jon
>
> *From:*Yang, Philip <Philip.Yang@amd.com>
> *Sent:* Tuesday, September 10, 2024 11:24 AM
> *To:* Koenig, Christian <Christian.Koenig@amd.com>; Kim, Jonathan 
> <Jonathan.Kim@amd.com>; amd-gfx@lists.freedesktop.org
> *Cc:* Kuehling, Felix <Felix.Kuehling@amd.com>; Deucher, Alexander 
> <Alexander.Deucher@amd.com>; Joshi, Mukul <Mukul.Joshi@amd.com>
> *Subject:* Re: [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple 
> partitions
>
> On 2024-09-09 14:46, Christian König wrote:
>
>     Am 09.09.24 um 18:02 schrieb Kim, Jonathan:
>
>         [Public]
>
>
>             -----Original Message-----
>             From: Christian König <ckoenig.leichtzumerken@gmail.com>
>             <mailto:ckoenig.leichtzumerken@gmail.com>
>             Sent: Thursday, September 5, 2024 10:24 AM
>             To: Kim, Jonathan <Jonathan.Kim@amd.com>
>             <mailto:Jonathan.Kim@amd.com>; amd-gfx@lists.freedesktop.org
>             Cc: Kuehling, Felix <Felix.Kuehling@amd.com>
>             <mailto:Felix.Kuehling@amd.com>; Deucher, Alexander
>             <Alexander.Deucher@amd.com>
>             <mailto:Alexander.Deucher@amd.com>; Joshi, Mukul
>             <Mukul.Joshi@amd.com> <mailto:Mukul.Joshi@amd.com>
>             Subject: Re: [PATCH] drm/amdkfd: fix vm-pasid lookup for
>             multiple partitions
>
>             Caution: This message originated from an External Source.
>             Use proper caution
>             when opening attachments, clicking links, or responding.
>
>
>             Am 19.08.24 um 19:59 schrieb Jonathan Kim:
>
>                 Currently multiple partitions will incorrectly
>                 overwrite the VM lookup
>                 table since the table is indexed by PASID and multiple
>                 partitions can
>                 register different VM objects on the same PASID.
>
>             That's a rather bad idea. Why do we have the same PASID
>             for different VM
>             objects in the first place?
>
>         Alex can probably elaborate on the KGD side, but from what I
>         can see, the KMS driver open call has always assigned a new VM
>         object per PASID on an open call.
>         The KFD acquires and replaces the KGD PASID-VMID registration
>         on its own compute process open/creation call.
>         If this is the bad_idea you're referring to, then someone else
>         will have to chime in.  I don't have much history on this
>         unfortunately.
>
>
>     Yeah, Felix and I designed that.
>
> app opens drm node to create vm for each partition, with different 
> vm->pasid for each vm, issue is from kfd_ioctl_acquire_vm -> 
> kfd_process_device_init_vm ->  amdgpu_amdkfd_gpuvm_set_vm_pasid, to 
> replace all vm->pasid with kfd process->pasid, which is from open kfd 
> node. This ends up to store only one vm to adev->vm_manager.pasids 
> with KFD process pasid, so we cannot retrieve correct vm from 
> adev->vm_manager.pasids on mGPUs or multiple partitions.
>
>
>
>         That aside, the current problem is, is that all KFD device
>         structures are logical partitions and register their PASID-VM
>         binding using this concept of a device.
>
>
>     As far as I can see that is the fundamental problem. This needs to
>     be fixed instead.
>
>
>         On the KGD side however, the registration table is maintained
>         in the adev struct, which is a physical socket.
>         So there's a mismatch in understanding of what a device is
>         between the KFD & KGD with regard to the look up table that
>         results in bad bindings.
>
>         Adding a per-partition dimension to the existing lookup table
>         resolves issues where seeing, for example, with memory
>         violation interception and XNACK i.e bad bindings result in
>         wrong vm object found to set no-retry flags on memory violations.
>
> svm_range_restore_pages retry fault recover uses fault pasid to get 
> kfd process, and use the fault node_id to get pdd->vm, maybe you can 
> use this way to fix the debugger issue.
>
> Regards,
>
> Philip
>
>
>     Yeah that is pretty much a no-go.
>
>     The PASID and how it is used is defined by the PCIe
>     specifications. If we now start to assign multiple VMs to the same
>     PASID then we are violating the PCIe specification.
>
>     The problems you see are most likely just the tip of the iceberg
>     here.
>
>     Regards,
>     Christian.
>
>
>
>         Jon
>
>
>             Regards,
>             Christian.
>
>
>                 This results in loading the wrong VM object on PASID
>                 query.
>
>                 To correct this, setup the lookup table to be
>                 per-partition-per-PASID
>                 instead.
>
>                 Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
>                 <mailto:jonathan.kim@amd.com>
>                 ---
>                 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 12 ++++
>                 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  1 +
>                 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  4 +-
>                 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  7 ++-
>                 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55
>                 +++++++++++------
>
>             -- 
>
>                 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 11 +++-
>                 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  5 +-
>                 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c |  5 +-
>                 drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c |  5 +-
>                 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c |  3 +-
>                 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c |  5 +-
>                 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 ++----
>                 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  2 +-
>                 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c |  4 +-
>                 drivers/gpu/drm/amd/amdkfd/kfd_events.c |  3 +-
>                 .../gpu/drm/amd/amdkfd/kfd_int_process_v10.c |  8 +--
>                 .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c |  8 +--
>                 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c |  3 +-
>                    18 files changed, 92 insertions(+), 65 deletions(-)
>
>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>
>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>
>                 index c272461d70a9..28db789610e1 100644
>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>                 @@ -887,3 +887,15 @@ int amdgpu_amdkfd_unmap_hiq(struct
>
>             amdgpu_device *adev, u32 doorbell_off,
>
>                        return r;
>                    }
>                 +
>                 +int amdgpu_amdkfd_node_id_to_xcc_id(struct
>                 amdgpu_device *adev,
>
>             uint32_t node_id)
>
>                 +{
>                 +     if (adev->gfx.funcs->ih_node_to_logical_xcc) {
>                 +             int xcc_id =
>                 adev->gfx.funcs->ih_node_to_logical_xcc(adev, node_id);
>                 +
>                 +             if (xcc_id >= 0)
>                 +                     return xcc_id;
>                 +     }
>                 +
>                 +     return 0;
>                 +}
>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>
>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>
>                 index 4ed49265c764..bf8bb45d8ab6 100644
>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>                 @@ -356,6 +356,7 @@ void
>                 amdgpu_amdkfd_unreserve_mem_limit(struct
>
>             amdgpu_device *adev,
>
>                                uint64_t size, u32 alloc_flag, int8_t
>                 xcp_id);
>
>                    u64 amdgpu_amdkfd_xcp_memory_size(struct
>                 amdgpu_device *adev, int
>
>             xcp_id);
>
>                 +int amdgpu_amdkfd_node_id_to_xcc_id(struct
>                 amdgpu_device *adev,
>
>             uint32_t node_id);
>
>                    #define KFD_XCP_MEM_ID(adev, xcp_id) \
>                                ((adev)->xcp_mgr && (xcp_id) >= 0 ?\
>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>
>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>
>                 index c6a1783fc9ef..bf9f8802e18d 100644
>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>                 @@ -37,7 +37,7 @@ static enum drm_gpu_sched_stat
>
>             amdgpu_job_timedout(struct drm_sched_job *s_job)
>
>                        struct amdgpu_job *job = to_amdgpu_job(s_job);
>                        struct amdgpu_task_info *ti;
>                        struct amdgpu_device *adev = ring->adev;
>                 -     int idx;
>                 +     int idx, xcp_id = !job->vm ? 0 : job->vm->xcp_id;
>                        int r;
>
>                        if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
>                 @@ -62,7 +62,7 @@ static enum drm_gpu_sched_stat
>
>             amdgpu_job_timedout(struct drm_sched_job *s_job)
>
>                 job->base.sched->name,
>                 atomic_read(&ring->fence_drv.last_seq),
>                                ring->fence_drv.sync_seq);
>
>                 -     ti = amdgpu_vm_get_task_info_pasid(ring->adev,
>                 job->pasid);
>                 +     ti = amdgpu_vm_get_task_info_pasid(ring->adev,
>                 job->pasid, xcp_id);
>                        if (ti) {
>                                dev_err(adev->dev,
>                                        "Process information: process
>                 %s pid %d thread %s pid %d\n",
>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>
>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>
>                 index d9fde38f6ee2..e413bf4a3e84 100644
>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>                 @@ -1275,17 +1275,20 @@ int amdgpu_info_ioctl(struct
>                 drm_device *dev,
>
>             void *data, struct drm_file *filp)
>
>                                struct amdgpu_vm *vm = &fpriv->vm;
>                                struct drm_amdgpu_info_gpuvm_fault
>                 gpuvm_fault;
>                                unsigned long flags;
>                 +             int i;
>
>                                if (!vm)
>                                        return -EINVAL;
>
>                                memset(&gpuvm_fault, 0,
>                 sizeof(gpuvm_fault));
>
>                 - xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>                 +             for (i = 0; i < adev->xcp_mgr->num_xcps;
>                 i++)
>                 + xa_lock_irqsave(&adev->vm_manager.pasids[i], flags);
>                                gpuvm_fault.addr = vm->fault_info.addr;
>                                gpuvm_fault.status =
>                 vm->fault_info.status;
>                                gpuvm_fault.vmhub = vm->fault_info.vmhub;
>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>                 +             for (i = 0; i < adev->xcp_mgr->num_xcps;
>                 i++)
>                 + xa_unlock_irqrestore(&adev->vm_manager.pasids[i],
>                 flags);
>
>                                return copy_to_user(out, &gpuvm_fault,
>                 min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0;
>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>
>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>
>                 index bcb729094521..f43e1c15f423 100644
>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>                 @@ -146,7 +146,7 @@ int amdgpu_vm_set_pasid(struct
>                 amdgpu_device
>
>             *adev, struct amdgpu_vm *vm,
>
>                                return 0;
>
>                        if (vm->pasid) {
>                 -             r =
>                 xa_err(xa_erase_irq(&adev->vm_manager.pasids,
>                 vm->pasid));
>                 +             r =
>                 xa_err(xa_erase_irq(&adev->vm_manager.pasids[vm->xcp_id],
>
>             vm->pasid));
>
>                                if (r < 0)
>                                        return r;
>
>                 @@ -154,7 +154,7 @@ int amdgpu_vm_set_pasid(struct
>                 amdgpu_device
>
>             *adev, struct amdgpu_vm *vm,
>
>                        }
>
>                        if (pasid) {
>                 -             r =
>                 xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm,
>                 +             r =
>                 xa_err(xa_store_irq(&adev->vm_manager.pasids[vm->xcp_id],
>
>             pasid, vm,
>
>                 GFP_KERNEL));
>                                if (r < 0)
>                                        return r;
>                 @@ -2288,14 +2288,14 @@ static void
>
>             amdgpu_vm_destroy_task_info(struct kref *kref)
>
>                    }
>
>                    static inline struct amdgpu_vm *
>                 -amdgpu_vm_get_vm_from_pasid(struct amdgpu_device
>                 *adev, u32 pasid)
>                 +amdgpu_vm_get_vm_from_pasid(struct amdgpu_device
>                 *adev, u32 pasid,
>
>             u32 xcp_id)
>
>                    {
>                        struct amdgpu_vm *vm;
>                        unsigned long flags;
>
>                 - xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>                 -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>                 + xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id],
>                 flags);
>                 +     vm = xa_load(&adev->vm_manager.pasids[xcp_id],
>                 pasid);
>                 +
>                 xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id],
>                 flags);
>
>                        return vm;
>                    }
>                 @@ -2343,10 +2343,10 @@ amdgpu_vm_get_task_info_vm(struct
>
>             amdgpu_vm *vm)
>
>                     * referenced down with amdgpu_vm_put_task_info.
>                     */
>                    struct amdgpu_task_info *
>                 -amdgpu_vm_get_task_info_pasid(struct amdgpu_device
>                 *adev, u32 pasid)
>                 +amdgpu_vm_get_task_info_pasid(struct amdgpu_device
>                 *adev, u32 pasid,
>
>             u32 xcp_id)
>
>                    {
>                        return amdgpu_vm_get_task_info_vm(
>                 - amdgpu_vm_get_vm_from_pasid(adev, pasid));
>                 + amdgpu_vm_get_vm_from_pasid(adev, pasid, xcp_id));
>                    }
>
>                    static int amdgpu_vm_create_task_info(struct
>                 amdgpu_vm *vm)
>                 @@ -2481,6 +2481,8 @@ int amdgpu_vm_init(struct
>                 amdgpu_device
>
>             *adev, struct amdgpu_vm *vm,
>
>                 amdgpu_bo_unreserve(vm->root.bo);
>                        amdgpu_bo_unref(&root_bo);
>
>                 +     vm->xcp_id = xcp_id < 0 ? 0 : xcp_id;
>                 +
>                        return 0;
>
>                    error_free_root:
>                 @@ -2695,8 +2697,8 @@ void amdgpu_vm_manager_init(struct
>
>             amdgpu_device *adev)
>
>                    #else
>                        adev->vm_manager.vm_update_mode = 0;
>                    #endif
>                 -
>                 - xa_init_flags(&adev->vm_manager.pasids,
>                 XA_FLAGS_LOCK_IRQ);
>                 +     for (i = 0; i < MAX_XCP; i++)
>                 + xa_init_flags(&(adev->vm_manager.pasids[i]),
>
>             XA_FLAGS_LOCK_IRQ);
>
>                    }
>
>                    /**
>                 @@ -2708,10 +2710,15 @@ void
>                 amdgpu_vm_manager_init(struct
>
>             amdgpu_device *adev)
>
>                     */
>                    void amdgpu_vm_manager_fini(struct amdgpu_device
>                 *adev)
>                    {
>                 - WARN_ON(!xa_empty(&adev->vm_manager.pasids));
>                 - xa_destroy(&adev->vm_manager.pasids);
>                 +     int i;
>                 +
>                 +     for (i = 0; i < MAX_XCP; i++) {
>                 + WARN_ON(!xa_empty(&adev->vm_manager.pasids[i]));
>                 + xa_destroy(&adev->vm_manager.pasids[i]);
>                 +     }
>
>                        amdgpu_vmid_mgr_fini(adev);
>                 +
>                    }
>
>                    /**
>                 @@ -2778,17 +2785,18 @@ bool
>                 amdgpu_vm_handle_fault(struct
>
>             amdgpu_device *adev, u32 pasid,
>
>                        unsigned long irqflags;
>                        uint64_t value, flags;
>                        struct amdgpu_vm *vm;
>                 -     int r;
>                 +     int r, xcp_id;
>
>                 - xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
>                 -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>                 +     xcp_id = amdgpu_amdkfd_node_id_to_xcc_id(adev,
>                 node_id)/adev-
>                 gfx.num_xcc_per_xcp;
>                 + xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id],
>                 irqflags);
>                 +     vm = xa_load(&adev->vm_manager.pasids[xcp_id],
>                 pasid);
>                        if (vm) {
>                                root = amdgpu_bo_ref(vm->root.bo);
>                                is_compute_context =
>                 vm->is_compute_context;
>                        } else {
>                                root = NULL;
>                        }
>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids,
>                 irqflags);
>                 +
>                 xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id],
>                 irqflags);
>
>                        if (!root)
>                                return false;
>                 @@ -2806,11 +2814,11 @@ bool
>                 amdgpu_vm_handle_fault(struct
>
>             amdgpu_device *adev, u32 pasid,
>
>                                goto error_unref;
>
>                        /* Double check that the VM still exists */
>                 - xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
>                 -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>                 + xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id],
>                 irqflags);
>                 +     vm = xa_load(&adev->vm_manager.pasids[xcp_id],
>                 pasid);
>                        if (vm && vm->root.bo != root)
>                                vm = NULL;
>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids,
>                 irqflags);
>                 +
>                 xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id],
>                 irqflags);
>                        if (!vm)
>                                goto error_unlock;
>
>                 @@ -2968,14 +2976,15 @@ void
>                 amdgpu_vm_update_fault_cache(struct
>
>             amdgpu_device *adev,
>
>                 unsigned int pasid,
>                                                  uint64_t addr,
>                                                  uint32_t status,
>                 -                               unsigned int vmhub)
>                 +                               unsigned int vmhub,
>                 +                               uint32_t xcp_id)
>                    {
>                        struct amdgpu_vm *vm;
>                        unsigned long flags;
>
>                 - xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>                 + xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id],
>                 flags);
>
>                 -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>                 +     vm = xa_load(&adev->vm_manager.pasids[xcp_id],
>                 pasid);
>                        /* Don't update the fault cache if status is
>                 0.  In the multiple
>                         * fault case, subsequent faults will return a
>                 0 status which is
>                         * useless for userspace and replaces the
>                 useful fault status, so
>                 @@ -3008,7 +3017,7 @@ void
>                 amdgpu_vm_update_fault_cache(struct
>
>             amdgpu_device *adev,
>
>                 WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
>                                }
>                        }
>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>                 +
>                 xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id],
>                 flags);
>                    }
>
>                    /**
>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>
>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>
>                 index 046949c4b695..1499f5f731e9 100644
>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>                 @@ -35,6 +35,7 @@
>                    #include "amdgpu_sync.h"
>                    #include "amdgpu_ring.h"
>                    #include "amdgpu_ids.h"
>                 +#include "amdgpu_xcp.h"
>
>                    struct drm_exec;
>
>                 @@ -418,6 +419,9 @@ struct amdgpu_vm {
>
>                        /* cached fault info */
>                        struct amdgpu_vm_fault_info fault_info;
>                 +
>                 +     /* XCP ID */
>                 +     int xcp_id;
>                    };
>
>                    struct amdgpu_vm_manager {
>                 @@ -456,7 +460,7 @@ struct amdgpu_vm_manager {
>                        /* PASID to VM mapping, will be used in
>                 interrupt context to
>                         * look up VM of a page fault
>                         */
>                 -     struct xarray pasids;
>                 +     struct xarray pasids[MAX_XCP];
>                        /* Global registration of recent page fault
>                 information */
>                        struct amdgpu_vm_fault_info fault_info;
>                    };
>                 @@ -550,7 +554,7 @@ bool
>                 amdgpu_vm_need_pipeline_sync(struct
>
>             amdgpu_ring *ring,
>
>                    void amdgpu_vm_check_compute_bug(struct
>                 amdgpu_device *adev);
>
>                    struct amdgpu_task_info *
>                 -amdgpu_vm_get_task_info_pasid(struct amdgpu_device
>                 *adev, u32 pasid);
>                 +amdgpu_vm_get_task_info_pasid(struct amdgpu_device
>                 *adev, u32 pasid,
>
>             u32 xcp_id);
>
>                    struct amdgpu_task_info *
>                    amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm);
>                 @@ -649,7 +653,8 @@ void
>                 amdgpu_vm_update_fault_cache(struct
>
>             amdgpu_device *adev,
>
>                 unsigned int pasid,
>                                                  uint64_t addr,
>                                                  uint32_t status,
>                 -                               unsigned int vmhub);
>                 +                               unsigned int vmhub,
>                 +                               uint32_t xcp_id);
>                    void amdgpu_vm_tlb_fence_create(struct
>                 amdgpu_device *adev,
>                                                 struct amdgpu_vm *vm,
>                                                 struct dma_fence
>                 **fence);
>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>
>             b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>
>                 index f0ceab3ce5bf..24b042febf5c 100644
>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>                 @@ -151,7 +151,8 @@ static int
>                 gmc_v10_0_process_interrupt(struct
>
>             amdgpu_device *adev,
>
>                 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>
>                 amdgpu_vm_update_fault_cache(adev, entry->pasid, addr,
>                 status,
>                 - entry->vmid_src ? AMDGPU_MMHUB0(0) :
>
>             AMDGPU_GFXHUB(0));
>
>                 + entry->vmid_src ? AMDGPU_MMHUB0(0) :
>
>             AMDGPU_GFXHUB(0),
>
>                 + 0);
>                        }
>
>                        if (!printk_ratelimit())
>                 @@ -161,7 +162,7 @@ static int
>                 gmc_v10_0_process_interrupt(struct
>
>             amdgpu_device *adev,
>
>                                "[%s] page fault (src_id:%u ring:%u
>                 vmid:%u pasid:%u)\n",
>                                entry->vmid_src ? "mmhub" : "gfxhub",
>                                entry->src_id, entry->ring_id,
>                 entry->vmid, entry->pasid);
>                 -     task_info = amdgpu_vm_get_task_info_pasid(adev,
>                 entry->pasid);
>                 +     task_info = amdgpu_vm_get_task_info_pasid(adev,
>                 entry->pasid, 0);
>                        if (task_info) {
>                                dev_err(adev->dev,
>                                        " in process %s pid %d thread
>                 %s pid %d\n",
>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>
>             b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>
>                 index 2797fd84432b..3507046d33e6 100644
>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>                 @@ -122,7 +122,8 @@ static int
>                 gmc_v11_0_process_interrupt(struct
>
>             amdgpu_device *adev,
>
>                 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>
>                 amdgpu_vm_update_fault_cache(adev, entry->pasid, addr,
>                 status,
>                 - entry->vmid_src ? AMDGPU_MMHUB0(0) :
>
>             AMDGPU_GFXHUB(0));
>
>                 + entry->vmid_src ? AMDGPU_MMHUB0(0) :
>
>             AMDGPU_GFXHUB(0),
>
>                 + 0);
>                        }
>
>                        if (printk_ratelimit()) {
>                 @@ -132,7 +133,7 @@ static int
>                 gmc_v11_0_process_interrupt(struct
>
>             amdgpu_device *adev,
>
>                 "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
>                                        entry->vmid_src ? "mmhub" :
>                 "gfxhub",
>                                        entry->src_id, entry->ring_id,
>                 entry->vmid, entry->pasid);
>                 -             task_info =
>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>                 +             task_info =
>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>                                if (task_info) {
>                                        dev_err(adev->dev,
>                                                " in process %s pid %d
>                 thread %s pid %d)\n",
>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>
>             b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>
>                 index 60acf676000b..9844564c6c74 100644
>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>                 @@ -115,7 +115,8 @@ static int
>                 gmc_v12_0_process_interrupt(struct
>
>             amdgpu_device *adev,
>
>                 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>
>                 amdgpu_vm_update_fault_cache(adev, entry->pasid, addr,
>                 status,
>                 - entry->vmid_src ? AMDGPU_MMHUB0(0) :
>
>             AMDGPU_GFXHUB(0));
>
>                 + entry->vmid_src ? AMDGPU_MMHUB0(0) :
>
>             AMDGPU_GFXHUB(0),
>
>                 + 0);
>                        }
>
>                        if (printk_ratelimit()) {
>                 @@ -125,7 +126,7 @@ static int
>                 gmc_v12_0_process_interrupt(struct
>
>             amdgpu_device *adev,
>
>                 "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
>                                        entry->vmid_src ? "mmhub" :
>                 "gfxhub",
>                                        entry->src_id, entry->ring_id,
>                 entry->vmid, entry->pasid);
>                 -             task_info =
>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>                 +             task_info =
>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>                                if (task_info) {
>                                        dev_err(adev->dev,
>                                                " in process %s pid %d
>                 thread %s pid %d)\n",
>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>
>             b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>
>                 index 994432fb57ea..2cdb0cbb7c4d 100644
>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>                 @@ -1268,7 +1268,8 @@ static int
>                 gmc_v7_0_process_interrupt(struct
>
>             amdgpu_device *adev,
>
>                                return 0;
>
>                        amdgpu_vm_update_fault_cache(adev, entry->pasid,
>                 -                                  ((u64)addr) <<
>                 AMDGPU_GPU_PAGE_SHIFT, status,
>
>             AMDGPU_GFXHUB(0));
>
>                 + ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
>                 + AMDGPU_GFXHUB(0), 0);
>
>                        if (amdgpu_vm_fault_stop ==
>                 AMDGPU_VM_FAULT_STOP_FIRST)
>                 gmc_v7_0_set_fault_enable_default(adev, false);
>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>
>             b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>
>                 index 86488c052f82..6855caeb7f74 100644
>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>                 @@ -1437,7 +1437,8 @@ static int
>                 gmc_v8_0_process_interrupt(struct
>
>             amdgpu_device *adev,
>
>                                return 0;
>
>                        amdgpu_vm_update_fault_cache(adev, entry->pasid,
>                 -                                  ((u64)addr) <<
>                 AMDGPU_GPU_PAGE_SHIFT, status,
>
>             AMDGPU_GFXHUB(0));
>
>                 + ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
>                 + AMDGPU_GFXHUB(0), 0);
>
>                        if (amdgpu_vm_fault_stop ==
>                 AMDGPU_VM_FAULT_STOP_FIRST)
>                 gmc_v8_0_set_fault_enable_default(adev, false);
>                 @@ -1448,7 +1449,7 @@ static int
>                 gmc_v8_0_process_interrupt(struct
>
>             amdgpu_device *adev,
>
>                 dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
>                                        entry->src_id,
>                 entry->src_data[0]);
>
>                 -             task_info =
>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>                 +             task_info =
>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>                                if (task_info) {
>                                        dev_err(adev->dev, " for
>                 process %s pid %d thread %s pid %d\n",
>                 task_info->process_name, task_info->tgid,
>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>
>             b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>
>                 index b73136d390cc..e183e08b2c02 100644
>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>                 @@ -556,10 +556,12 @@ static int
>                 gmc_v9_0_process_interrupt(struct
>
>             amdgpu_device *adev,
>
>                        unsigned int vmhub;
>                        u64 addr;
>                        uint32_t cam_index = 0;
>                 -     int ret, xcc_id = 0;
>                 -     uint32_t node_id;
>                 +     int ret;
>                 +     uint32_t node_id, xcc_id, xcp_id;
>
>                        node_id = entry->node_id;
>                 +     xcc_id = amdgpu_amdkfd_node_id_to_xcc_id(adev,
>                 node_id);
>                 +     xcp_id = xcc_id/adev->gfx.num_xcc_per_xcp;
>
>                        addr = (u64)entry->src_data[0] << 12;
>                        addr |= ((u64)entry->src_data[1] & 0xf) << 44;
>                 @@ -572,12 +574,6 @@ static int
>                 gmc_v9_0_process_interrupt(struct
>
>             amdgpu_device *adev,
>
>                                vmhub = AMDGPU_MMHUB1(0);
>                        } else {
>                                hub_name = "gfxhub0";
>                 -             if
>                 (adev->gfx.funcs->ih_node_to_logical_xcc) {
>                 -                     xcc_id =
>                 adev->gfx.funcs->ih_node_to_logical_xcc(adev,
>                 -                             node_id);
>                 -                     if (xcc_id < 0)
>                 -                             xcc_id = 0;
>                 -             }
>                                vmhub = xcc_id;
>                        }
>                        hub = &adev->vmhub[vmhub];
>                 @@ -631,7 +627,7 @@ static int
>                 gmc_v9_0_process_interrupt(struct
>
>             amdgpu_device *adev,
>
>                                retry_fault ? "retry" : "no-retry",
>                                entry->src_id, entry->ring_id,
>                 entry->vmid, entry->pasid);
>
>                 -     task_info = amdgpu_vm_get_task_info_pasid(adev,
>                 entry->pasid);
>                 +     task_info = amdgpu_vm_get_task_info_pasid(adev,
>                 entry->pasid,
>
>             xcp_id);
>
>                        if (task_info) {
>                                dev_err(adev->dev,
>                                        " for process %s pid %d thread
>                 %s pid %d)\n",
>                 @@ -675,7 +671,7 @@ static int
>                 gmc_v9_0_process_interrupt(struct
>
>             amdgpu_device *adev,
>
>                        if (!amdgpu_sriov_vf(adev))
>                 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>
>                 -     amdgpu_vm_update_fault_cache(adev, entry->pasid,
>                 addr, status,
>
>             vmhub);
>
>                 + amdgpu_vm_update_fault_cache(adev, entry->pasid,
>                 addr, status,
>
>             vmhub, xcp_id);
>
>                 dev_err(adev->dev,
>                 "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
>                 diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>
>             b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>
>                 index 23ef4eb36b40..1ac4224bbe5b 100644
>                 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>                 +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>                 @@ -2182,7 +2182,7 @@ static int
>                 sdma_v4_0_print_iv_entry(struct
>
>             amdgpu_device *adev,
>
>                 instance, addr, entry->src_id, entry->ring_id,
>                 entry->vmid,
>                                           entry->pasid);
>
>                 -     task_info = amdgpu_vm_get_task_info_pasid(adev,
>                 entry->pasid);
>                 +     task_info = amdgpu_vm_get_task_info_pasid(adev,
>                 entry->pasid, 0);
>                        if (task_info) {
>                 dev_dbg_ratelimited(adev->dev,
>                                                    " for process %s
>                 pid %d thread %s pid %d\n",
>                 diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>
>             b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>
>                 index 57f16c09abfc..c8b5c0302ca7 100644
>                 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>                 +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>                 @@ -1683,6 +1683,8 @@ static int
>                 sdma_v4_4_2_print_iv_entry(struct
>
>             amdgpu_device *adev,
>
>                        int instance;
>                        struct amdgpu_task_info *task_info;
>                        u64 addr;
>                 +     uint32_t xcc_id =
>                 amdgpu_amdkfd_node_id_to_xcc_id(adev, entry-
>                 node_id);
>                 +     uint32_t xcp_id = xcc_id/adev->gfx.num_xcc_per_xcp;
>
>                        instance = sdma_v4_4_2_irq_id_to_seq(adev,
>                 entry->client_id);
>                        if (instance < 0 || instance >=
>                 adev->sdma.num_instances) {
>                 @@ -1698,7 +1700,7 @@ static int
>                 sdma_v4_4_2_print_iv_entry(struct
>
>             amdgpu_device *adev,
>
>                 instance, addr, entry->src_id, entry->ring_id,
>                 entry->vmid,
>                                            entry->pasid);
>
>                 -     task_info = amdgpu_vm_get_task_info_pasid(adev,
>                 entry->pasid);
>                 +     task_info = amdgpu_vm_get_task_info_pasid(adev,
>                 entry->pasid,
>
>             xcp_id);
>
>                        if (task_info) {
>                 dev_dbg_ratelimited(adev->dev, " for process %s pid %d
>                 thread %s
>
>             pid %d\n",
>
>                 task_info->process_name, task_info->tgid,
>                 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>
>             b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>
>                 index ea3792249209..c098fbaf0e1c 100644
>                 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>                 +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>                 @@ -1262,8 +1262,9 @@ void
>                 kfd_signal_reset_event(struct kfd_node
>
>             *dev)
>
>                                if (dev->dqm->detect_hang_count) {
>                                        struct amdgpu_task_info *ti;
>                 +                     uint32_t xcp_id = dev->xcp ?
>                 dev->xcp->id : 0;
>
>                 -                     ti =
>                 amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid);
>                 +                     ti =
>                 amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid,
>
>             xcp_id);
>
>                                        if (ti) {
>                 dev_err(dev->adev->dev,
>                                                        "Queues reset
>                 on process %s tid %d thread %s pid %d\n",
>                 diff --git
>                 a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>
>             b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>
>                 index 8e0d0356e810..d7cbf9525698 100644
>                 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>                 +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>                 @@ -377,12 +377,8 @@ static void
>                 event_interrupt_wq_v10(struct
>
>             kfd_node *dev,
>
>                                struct kfd_hsa_memory_exception_data
>                 exception_data;
>
>                                /* gfxhub */
>                 -             if (!vmid_type &&
>                 dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
>                 -                     hub_inst =
>                 dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev-
>                 adev,
>                 -                             node_id);
>                 -                     if (hub_inst < 0)
>                 -                             hub_inst = 0;
>                 -             }
>                 +             if (!vmid_type)
>                 +                     hub_inst =
>                 amdgpu_amdkfd_node_id_to_xcc_id(dev->adev,
>
>             node_id);
>
>                                /* mmhub */
>                                if (vmid_type && client_id ==
>                 SOC15_IH_CLIENTID_VMC)
>                 diff --git
>                 a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>
>             b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>
>                 index a9c3580be8c9..4708b8c811a5 100644
>                 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>                 +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>                 @@ -437,12 +437,8 @@ static void
>                 event_interrupt_wq_v9(struct
>
>             kfd_node *dev,
>
>                                struct kfd_hsa_memory_exception_data
>                 exception_data;
>
>                                /* gfxhub */
>                 -             if (!vmid_type &&
>                 dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
>                 -                     hub_inst =
>                 dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev-
>                 adev,
>                 -                             node_id);
>                 -                     if (hub_inst < 0)
>                 -                             hub_inst = 0;
>                 -             }
>                 +             if (!vmid_type)
>                 +                     hub_inst =
>                 amdgpu_amdkfd_node_id_to_xcc_id(dev->adev,
>
>             node_id);
>
>                                /* mmhub */
>                                if (vmid_type && client_id ==
>                 SOC15_IH_CLIENTID_VMC)
>                 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>
>             b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>
>                 index ea6a8e43bd5b..b5f2f5b1069c 100644
>                 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>                 +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>                 @@ -251,8 +251,9 @@ void
>
>             kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
>
>                    void kfd_smi_event_update_vmfault(struct kfd_node
>                 *dev, uint16_t pasid)
>                    {
>                        struct amdgpu_task_info *task_info;
>                 +     uint32_t xcp_id = dev->xcp ? dev->xcp->id : 0;
>
>                 -     task_info =
>                 amdgpu_vm_get_task_info_pasid(dev->adev, pasid);
>                 +     task_info =
>                 amdgpu_vm_get_task_info_pasid(dev->adev, pasid, xcp_id);
>                        if (task_info) {
>                                /* Report VM faults from user
>                 applications, not retry from kernel */
>                                if (task_info->pid)
>

[-- Attachment #2: Type: text/html, Size: 120412 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions
  2024-09-10 23:59           ` Chen, Xiaogang
@ 2024-09-11  6:54             ` Christian König
  2024-09-11 13:40               ` Philip Yang
  0 siblings, 1 reply; 11+ messages in thread
From: Christian König @ 2024-09-11  6:54 UTC (permalink / raw)
  To: Chen, Xiaogang, Kim, Jonathan, Yang, Philip,
	amd-gfx@lists.freedesktop.org
  Cc: Kuehling, Felix, Deucher, Alexander, Joshi, Mukul

[-- Attachment #1: Type: text/plain, Size: 50102 bytes --]

Yeah, I completely agree with Xiaogang.

The PASID is an identifier of an address space. And the idea of the KFD 
was that we can just use the same address space and with it the page 
tables for multiple execution devices, e.g. CPUs, GPUs etc...

That idea turned out to be a bad one because it clashes with some use 
cases (e.g. native context virtualization). The better approach is to 
see the CPU and GPU processes as separate things which just share the 
same underlying data.

Opening the KFD node multiple times currently results in the same KFD 
process being used. We should probably consider changing that.

Regards,
Christian.

Am 11.09.24 um 01:59 schrieb Chen, Xiaogang:
>
>
> You want have 1:1 mapping between vm and pasid so can query vm from 
> pasid.  I think there is a basic existing issue that we cannot have vm 
> and pasid 1:1 correspondence.
>
> PASIDs are global address space identifiers that can be shared between 
> the GPU, an IOMMU and the driver. One app should have one pasid that 
> iommu uses to decide which page table to use when device access system 
> resource. But one app can open render/kfd node multiple times even for 
> one gpu. That said one app could have multiple GPU vms .
>
> I think we did not have this issue because app usually open a rent 
> node or kfd node only once. With one adev has multiple partitions 
> there are multiple vms on one adev, so have this issue.  But the root 
> cause is not from multiple partitions and solution is not to introduce 
> multiple pasids. I think we should have one pasid for one app and use 
> different way to get vm from pasid.
>
>
> Regards
>
> Xiaogang
>
> On 9/10/2024 3:47 PM, Kim, Jonathan wrote:
>>
>> [Public]
>>
>>
>>
>> 	
>> Caution: This message originated from an External Source. Use proper 
>> caution when opening attachments, clicking links, or responding.
>>
>>
>> [Public]
>>
>>
>> KMS open still set per pasid-vm bindings per adev (socket) so I don’t 
>> see how the per-partition overwrite PASID issue is primarily a KFD 
>> concern.
>>
>> Are you saying the KFD process devices holds a shadow copy of the 
>> correct VM during page restore during fault?
>>
>> Doesn’t it acquire the wrong VM object on process init in the first 
>> place?
>>
>> Even if it were the case the KFD had a separate VM reference, the 
>> underlying IRQ fault handling is still broken.
>>
>> We probably don’t want to bandage over something to fix one symptom.
>>
>> Jon
>>
>> *From:*Yang, Philip <Philip.Yang@amd.com>
>> *Sent:* Tuesday, September 10, 2024 11:24 AM
>> *To:* Koenig, Christian <Christian.Koenig@amd.com>; Kim, Jonathan 
>> <Jonathan.Kim@amd.com>; amd-gfx@lists.freedesktop.org
>> *Cc:* Kuehling, Felix <Felix.Kuehling@amd.com>; Deucher, Alexander 
>> <Alexander.Deucher@amd.com>; Joshi, Mukul <Mukul.Joshi@amd.com>
>> *Subject:* Re: [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple 
>> partitions
>>
>> On 2024-09-09 14:46, Christian König wrote:
>>
>>     Am 09.09.24 um 18:02 schrieb Kim, Jonathan:
>>
>>         [Public]
>>
>>
>>             -----Original Message-----
>>             From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>             <mailto:ckoenig.leichtzumerken@gmail.com>
>>             Sent: Thursday, September 5, 2024 10:24 AM
>>             To: Kim, Jonathan <Jonathan.Kim@amd.com>
>>             <mailto:Jonathan.Kim@amd.com>; amd-gfx@lists.freedesktop.org
>>             Cc: Kuehling, Felix <Felix.Kuehling@amd.com>
>>             <mailto:Felix.Kuehling@amd.com>; Deucher, Alexander
>>             <Alexander.Deucher@amd.com>
>>             <mailto:Alexander.Deucher@amd.com>; Joshi, Mukul
>>             <Mukul.Joshi@amd.com> <mailto:Mukul.Joshi@amd.com>
>>             Subject: Re: [PATCH] drm/amdkfd: fix vm-pasid lookup for
>>             multiple partitions
>>
>>             Caution: This message originated from an External Source.
>>             Use proper caution
>>             when opening attachments, clicking links, or responding.
>>
>>
>>             Am 19.08.24 um 19:59 schrieb Jonathan Kim:
>>
>>                 Currently multiple partitions will incorrectly
>>                 overwrite the VM lookup
>>                 table since the table is indexed by PASID and
>>                 multiple partitions can
>>                 register different VM objects on the same PASID.
>>
>>             That's a rather bad idea. Why do we have the same PASID
>>             for different VM
>>             objects in the first place?
>>
>>         Alex can probably elaborate on the KGD side, but from what I
>>         can see, the KMS driver open call has always assigned a new
>>         VM object per PASID on an open call.
>>         The KFD acquires and replaces the KGD PASID-VMID registration
>>         on its own compute process open/creation call.
>>         If this is the bad_idea you're referring to, then someone
>>         else will have to chime in.  I don't have much history on
>>         this unfortunately.
>>
>>
>>     Yeah, Felix and I designed that.
>>
>> app opens drm node to create vm for each partition, with different 
>> vm->pasid for each vm, issue is from kfd_ioctl_acquire_vm -> 
>> kfd_process_device_init_vm -> amdgpu_amdkfd_gpuvm_set_vm_pasid, to 
>> replace all vm->pasid with kfd process->pasid, which is from open kfd 
>> node. This ends up to store only one vm to adev->vm_manager.pasids 
>> with KFD process pasid, so we cannot retrieve correct vm from 
>> adev->vm_manager.pasids on mGPUs or multiple partitions.
>>
>>
>>
>>         That aside, the current problem is, is that all KFD device
>>         structures are logical partitions and register their PASID-VM
>>         binding using this concept of a device.
>>
>>
>>     As far as I can see that is the fundamental problem. This needs
>>     to be fixed instead.
>>
>>
>>         On the KGD side however, the registration table is maintained
>>         in the adev struct, which is a physical socket.
>>         So there's a mismatch in understanding of what a device is
>>         between the KFD & KGD with regard to the look up table that
>>         results in bad bindings.
>>
>>         Adding a per-partition dimension to the existing lookup table
>>         resolves issues where seeing, for example, with memory
>>         violation interception and XNACK i.e bad bindings result in
>>         wrong vm object found to set no-retry flags on memory
>>         violations.
>>
>> svm_range_restore_pages retry fault recover uses fault pasid to get 
>> kfd process, and use the fault node_id to get pdd->vm, maybe you can 
>> use this way to fix the debugger issue.
>>
>> Regards,
>>
>> Philip
>>
>>
>>     Yeah that is pretty much a no-go.
>>
>>     The PASID and how it is used is defined by the PCIe
>>     specifications. If we now start to assign multiple VMs to the
>>     same PASID then we are violating the PCIe specification.
>>
>>     The problems you see are most likely just the tip of the iceberg
>>     here.
>>
>>     Regards,
>>     Christian.
>>
>>
>>
>>         Jon
>>
>>
>>             Regards,
>>             Christian.
>>
>>
>>                 This results in loading the wrong VM object on PASID
>>                 query.
>>
>>                 To correct this, setup the lookup table to be
>>                 per-partition-per-PASID
>>                 instead.
>>
>>                 Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
>>                 <mailto:jonathan.kim@amd.com>
>>                 ---
>>                 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 12 ++++
>>                 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  1 +
>>                 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  4 +-
>>                 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  7 ++-
>>                 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55
>>                 +++++++++++------
>>
>>             -- 
>>
>>                 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 11 +++-
>>                 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  5 +-
>>                 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c |  5 +-
>>                 drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c |  5 +-
>>                 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c |  3 +-
>>                 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c |  5 +-
>>                 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 ++----
>>                 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  2 +-
>>                 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c |  4 +-
>>                 drivers/gpu/drm/amd/amdkfd/kfd_events.c |  3 +-
>>                 .../gpu/drm/amd/amdkfd/kfd_int_process_v10.c |  8 +--
>>                 .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c |  8 +--
>>                 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c |  3 +-
>>                    18 files changed, 92 insertions(+), 65 deletions(-)
>>
>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>
>>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>
>>                 index c272461d70a9..28db789610e1 100644
>>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>                 @@ -887,3 +887,15 @@ int amdgpu_amdkfd_unmap_hiq(struct
>>
>>             amdgpu_device *adev, u32 doorbell_off,
>>
>>                        return r;
>>                    }
>>                 +
>>                 +int amdgpu_amdkfd_node_id_to_xcc_id(struct
>>                 amdgpu_device *adev,
>>
>>             uint32_t node_id)
>>
>>                 +{
>>                 +     if (adev->gfx.funcs->ih_node_to_logical_xcc) {
>>                 +             int xcc_id =
>>                 adev->gfx.funcs->ih_node_to_logical_xcc(adev, node_id);
>>                 +
>>                 +             if (xcc_id >= 0)
>>                 +                     return xcc_id;
>>                 +     }
>>                 +
>>                 +     return 0;
>>                 +}
>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>
>>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>
>>                 index 4ed49265c764..bf8bb45d8ab6 100644
>>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>                 @@ -356,6 +356,7 @@ void
>>                 amdgpu_amdkfd_unreserve_mem_limit(struct
>>
>>             amdgpu_device *adev,
>>
>>                                uint64_t size, u32 alloc_flag, int8_t
>>                 xcp_id);
>>
>>                    u64 amdgpu_amdkfd_xcp_memory_size(struct
>>                 amdgpu_device *adev, int
>>
>>             xcp_id);
>>
>>                 +int amdgpu_amdkfd_node_id_to_xcc_id(struct
>>                 amdgpu_device *adev,
>>
>>             uint32_t node_id);
>>
>>                    #define KFD_XCP_MEM_ID(adev, xcp_id) \
>>                                ((adev)->xcp_mgr && (xcp_id) >= 0 ?\
>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>
>>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>
>>                 index c6a1783fc9ef..bf9f8802e18d 100644
>>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>                 @@ -37,7 +37,7 @@ static enum drm_gpu_sched_stat
>>
>>             amdgpu_job_timedout(struct drm_sched_job *s_job)
>>
>>                        struct amdgpu_job *job = to_amdgpu_job(s_job);
>>                        struct amdgpu_task_info *ti;
>>                        struct amdgpu_device *adev = ring->adev;
>>                 -     int idx;
>>                 +     int idx, xcp_id = !job->vm ? 0 : job->vm->xcp_id;
>>                        int r;
>>
>>                        if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
>>                 @@ -62,7 +62,7 @@ static enum drm_gpu_sched_stat
>>
>>             amdgpu_job_timedout(struct drm_sched_job *s_job)
>>
>>                 job->base.sched->name,
>>                 atomic_read(&ring->fence_drv.last_seq),
>>                                ring->fence_drv.sync_seq);
>>
>>                 -     ti = amdgpu_vm_get_task_info_pasid(ring->adev,
>>                 job->pasid);
>>                 +     ti = amdgpu_vm_get_task_info_pasid(ring->adev,
>>                 job->pasid, xcp_id);
>>                        if (ti) {
>>                                dev_err(adev->dev,
>>                                        "Process information: process
>>                 %s pid %d thread %s pid %d\n",
>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>
>>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>
>>                 index d9fde38f6ee2..e413bf4a3e84 100644
>>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>                 @@ -1275,17 +1275,20 @@ int amdgpu_info_ioctl(struct
>>                 drm_device *dev,
>>
>>             void *data, struct drm_file *filp)
>>
>>                                struct amdgpu_vm *vm = &fpriv->vm;
>>                                struct drm_amdgpu_info_gpuvm_fault
>>                 gpuvm_fault;
>>                                unsigned long flags;
>>                 +             int i;
>>
>>                                if (!vm)
>>                                        return -EINVAL;
>>
>>                                memset(&gpuvm_fault, 0,
>>                 sizeof(gpuvm_fault));
>>
>>                 - xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>                 +             for (i = 0; i <
>>                 adev->xcp_mgr->num_xcps; i++)
>>                 + xa_lock_irqsave(&adev->vm_manager.pasids[i], flags);
>>                                gpuvm_fault.addr = vm->fault_info.addr;
>>                                gpuvm_fault.status =
>>                 vm->fault_info.status;
>>                                gpuvm_fault.vmhub = vm->fault_info.vmhub;
>>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>>                 +             for (i = 0; i <
>>                 adev->xcp_mgr->num_xcps; i++)
>>                 + xa_unlock_irqrestore(&adev->vm_manager.pasids[i],
>>                 flags);
>>
>>                                return copy_to_user(out, &gpuvm_fault,
>>                 min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0;
>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>
>>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>
>>                 index bcb729094521..f43e1c15f423 100644
>>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>                 @@ -146,7 +146,7 @@ int amdgpu_vm_set_pasid(struct
>>                 amdgpu_device
>>
>>             *adev, struct amdgpu_vm *vm,
>>
>>                                return 0;
>>
>>                        if (vm->pasid) {
>>                 -             r =
>>                 xa_err(xa_erase_irq(&adev->vm_manager.pasids,
>>                 vm->pasid));
>>                 +             r =
>>                 xa_err(xa_erase_irq(&adev->vm_manager.pasids[vm->xcp_id],
>>
>>
>>             vm->pasid));
>>
>>                                if (r < 0)
>>                                        return r;
>>
>>                 @@ -154,7 +154,7 @@ int amdgpu_vm_set_pasid(struct
>>                 amdgpu_device
>>
>>             *adev, struct amdgpu_vm *vm,
>>
>>                        }
>>
>>                        if (pasid) {
>>                 -             r =
>>                 xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm,
>>                 +             r =
>>                 xa_err(xa_store_irq(&adev->vm_manager.pasids[vm->xcp_id],
>>
>>
>>             pasid, vm,
>>
>>                 GFP_KERNEL));
>>                                if (r < 0)
>>                                        return r;
>>                 @@ -2288,14 +2288,14 @@ static void
>>
>>             amdgpu_vm_destroy_task_info(struct kref *kref)
>>
>>                    }
>>
>>                    static inline struct amdgpu_vm *
>>                 -amdgpu_vm_get_vm_from_pasid(struct amdgpu_device
>>                 *adev, u32 pasid)
>>                 +amdgpu_vm_get_vm_from_pasid(struct amdgpu_device
>>                 *adev, u32 pasid,
>>
>>             u32 xcp_id)
>>
>>                    {
>>                        struct amdgpu_vm *vm;
>>                        unsigned long flags;
>>
>>                 - xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>                 -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>>                 + xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id],
>>                 flags);
>>                 +     vm = xa_load(&adev->vm_manager.pasids[xcp_id],
>>                 pasid);
>>                 +
>>                 xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id],
>>                 flags);
>>
>>                        return vm;
>>                    }
>>                 @@ -2343,10 +2343,10 @@
>>                 amdgpu_vm_get_task_info_vm(struct
>>
>>             amdgpu_vm *vm)
>>
>>                     * referenced down with amdgpu_vm_put_task_info.
>>                     */
>>                    struct amdgpu_task_info *
>>                 -amdgpu_vm_get_task_info_pasid(struct amdgpu_device
>>                 *adev, u32 pasid)
>>                 +amdgpu_vm_get_task_info_pasid(struct amdgpu_device
>>                 *adev, u32 pasid,
>>
>>             u32 xcp_id)
>>
>>                    {
>>                        return amdgpu_vm_get_task_info_vm(
>>                 - amdgpu_vm_get_vm_from_pasid(adev, pasid));
>>                 + amdgpu_vm_get_vm_from_pasid(adev, pasid, xcp_id));
>>                    }
>>
>>                    static int amdgpu_vm_create_task_info(struct
>>                 amdgpu_vm *vm)
>>                 @@ -2481,6 +2481,8 @@ int amdgpu_vm_init(struct
>>                 amdgpu_device
>>
>>             *adev, struct amdgpu_vm *vm,
>>
>>                 amdgpu_bo_unreserve(vm->root.bo);
>>                        amdgpu_bo_unref(&root_bo);
>>
>>                 +     vm->xcp_id = xcp_id < 0 ? 0 : xcp_id;
>>                 +
>>                        return 0;
>>
>>                    error_free_root:
>>                 @@ -2695,8 +2697,8 @@ void amdgpu_vm_manager_init(struct
>>
>>             amdgpu_device *adev)
>>
>>                    #else
>>                        adev->vm_manager.vm_update_mode = 0;
>>                    #endif
>>                 -
>>                 - xa_init_flags(&adev->vm_manager.pasids,
>>                 XA_FLAGS_LOCK_IRQ);
>>                 +     for (i = 0; i < MAX_XCP; i++)
>>                 + xa_init_flags(&(adev->vm_manager.pasids[i]),
>>
>>             XA_FLAGS_LOCK_IRQ);
>>
>>                    }
>>
>>                    /**
>>                 @@ -2708,10 +2710,15 @@ void
>>                 amdgpu_vm_manager_init(struct
>>
>>             amdgpu_device *adev)
>>
>>                     */
>>                    void amdgpu_vm_manager_fini(struct amdgpu_device
>>                 *adev)
>>                    {
>>                 - WARN_ON(!xa_empty(&adev->vm_manager.pasids));
>>                 - xa_destroy(&adev->vm_manager.pasids);
>>                 +     int i;
>>                 +
>>                 +     for (i = 0; i < MAX_XCP; i++) {
>>                 + WARN_ON(!xa_empty(&adev->vm_manager.pasids[i]));
>>                 + xa_destroy(&adev->vm_manager.pasids[i]);
>>                 +     }
>>
>>                        amdgpu_vmid_mgr_fini(adev);
>>                 +
>>                    }
>>
>>                    /**
>>                 @@ -2778,17 +2785,18 @@ bool
>>                 amdgpu_vm_handle_fault(struct
>>
>>             amdgpu_device *adev, u32 pasid,
>>
>>                        unsigned long irqflags;
>>                        uint64_t value, flags;
>>                        struct amdgpu_vm *vm;
>>                 -     int r;
>>                 +     int r, xcp_id;
>>
>>                 - xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
>>                 -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>>                 +     xcp_id = amdgpu_amdkfd_node_id_to_xcc_id(adev,
>>                 node_id)/adev-
>>                 gfx.num_xcc_per_xcp;
>>                 + xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id],
>>                 irqflags);
>>                 +     vm = xa_load(&adev->vm_manager.pasids[xcp_id],
>>                 pasid);
>>                        if (vm) {
>>                                root = amdgpu_bo_ref(vm->root.bo);
>>                                is_compute_context =
>>                 vm->is_compute_context;
>>                        } else {
>>                                root = NULL;
>>                        }
>>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids,
>>                 irqflags);
>>                 +
>>                 xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id],
>>                 irqflags);
>>
>>                        if (!root)
>>                                return false;
>>                 @@ -2806,11 +2814,11 @@ bool
>>                 amdgpu_vm_handle_fault(struct
>>
>>             amdgpu_device *adev, u32 pasid,
>>
>>                                goto error_unref;
>>
>>                        /* Double check that the VM still exists */
>>                 - xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
>>                 -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>>                 + xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id],
>>                 irqflags);
>>                 +     vm = xa_load(&adev->vm_manager.pasids[xcp_id],
>>                 pasid);
>>                        if (vm && vm->root.bo != root)
>>                                vm = NULL;
>>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids,
>>                 irqflags);
>>                 +
>>                 xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id],
>>                 irqflags);
>>                        if (!vm)
>>                                goto error_unlock;
>>
>>                 @@ -2968,14 +2976,15 @@ void
>>                 amdgpu_vm_update_fault_cache(struct
>>
>>             amdgpu_device *adev,
>>
>>                 unsigned int pasid,
>>                                                  uint64_t addr,
>>                                                  uint32_t status,
>>                 -                               unsigned int vmhub)
>>                 +                               unsigned int vmhub,
>>                 +                               uint32_t xcp_id)
>>                    {
>>                        struct amdgpu_vm *vm;
>>                        unsigned long flags;
>>
>>                 - xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>                 + xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id],
>>                 flags);
>>
>>                 -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>>                 +     vm = xa_load(&adev->vm_manager.pasids[xcp_id],
>>                 pasid);
>>                        /* Don't update the fault cache if status is
>>                 0.  In the multiple
>>                         * fault case, subsequent faults will return a
>>                 0 status which is
>>                         * useless for userspace and replaces the
>>                 useful fault status, so
>>                 @@ -3008,7 +3017,7 @@ void
>>                 amdgpu_vm_update_fault_cache(struct
>>
>>             amdgpu_device *adev,
>>
>>                 WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
>>                                }
>>                        }
>>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>>                 +
>>                 xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id],
>>                 flags);
>>                    }
>>
>>                    /**
>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>
>>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>
>>                 index 046949c4b695..1499f5f731e9 100644
>>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>                 @@ -35,6 +35,7 @@
>>                    #include "amdgpu_sync.h"
>>                    #include "amdgpu_ring.h"
>>                    #include "amdgpu_ids.h"
>>                 +#include "amdgpu_xcp.h"
>>
>>                    struct drm_exec;
>>
>>                 @@ -418,6 +419,9 @@ struct amdgpu_vm {
>>
>>                        /* cached fault info */
>>                        struct amdgpu_vm_fault_info fault_info;
>>                 +
>>                 +     /* XCP ID */
>>                 +     int xcp_id;
>>                    };
>>
>>                    struct amdgpu_vm_manager {
>>                 @@ -456,7 +460,7 @@ struct amdgpu_vm_manager {
>>                        /* PASID to VM mapping, will be used in
>>                 interrupt context to
>>                         * look up VM of a page fault
>>                         */
>>                 -     struct xarray                           pasids;
>>                 +     struct xarray pasids[MAX_XCP];
>>                        /* Global registration of recent page fault
>>                 information */
>>                        struct amdgpu_vm_fault_info fault_info;
>>                    };
>>                 @@ -550,7 +554,7 @@ bool
>>                 amdgpu_vm_need_pipeline_sync(struct
>>
>>             amdgpu_ring *ring,
>>
>>                    void amdgpu_vm_check_compute_bug(struct
>>                 amdgpu_device *adev);
>>
>>                    struct amdgpu_task_info *
>>                 -amdgpu_vm_get_task_info_pasid(struct amdgpu_device
>>                 *adev, u32 pasid);
>>                 +amdgpu_vm_get_task_info_pasid(struct amdgpu_device
>>                 *adev, u32 pasid,
>>
>>             u32 xcp_id);
>>
>>                    struct amdgpu_task_info *
>>                    amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm);
>>                 @@ -649,7 +653,8 @@ void
>>                 amdgpu_vm_update_fault_cache(struct
>>
>>             amdgpu_device *adev,
>>
>>                 unsigned int pasid,
>>                                                  uint64_t addr,
>>                                                  uint32_t status,
>>                 -                               unsigned int vmhub);
>>                 +                               unsigned int vmhub,
>>                 +                               uint32_t xcp_id);
>>                    void amdgpu_vm_tlb_fence_create(struct
>>                 amdgpu_device *adev,
>>                                                 struct amdgpu_vm *vm,
>>                                                 struct dma_fence
>>                 **fence);
>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>
>>             b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>
>>                 index f0ceab3ce5bf..24b042febf5c 100644
>>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>                 @@ -151,7 +151,8 @@ static int
>>                 gmc_v10_0_process_interrupt(struct
>>
>>             amdgpu_device *adev,
>>
>>                 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>>
>>                 amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>                 addr, status,
>>                 - entry->vmid_src ? AMDGPU_MMHUB0(0) :
>>
>>             AMDGPU_GFXHUB(0));
>>
>>                 + entry->vmid_src ? AMDGPU_MMHUB0(0) :
>>
>>             AMDGPU_GFXHUB(0),
>>
>>                 + 0);
>>                        }
>>
>>                        if (!printk_ratelimit())
>>                 @@ -161,7 +162,7 @@ static int
>>                 gmc_v10_0_process_interrupt(struct
>>
>>             amdgpu_device *adev,
>>
>>                                "[%s] page fault (src_id:%u ring:%u
>>                 vmid:%u pasid:%u)\n",
>>                                entry->vmid_src ? "mmhub" : "gfxhub",
>>                                entry->src_id, entry->ring_id,
>>                 entry->vmid, entry->pasid);
>>                 -     task_info = amdgpu_vm_get_task_info_pasid(adev,
>>                 entry->pasid);
>>                 +     task_info = amdgpu_vm_get_task_info_pasid(adev,
>>                 entry->pasid, 0);
>>                        if (task_info) {
>>                                dev_err(adev->dev,
>>                                        " in process %s pid %d thread
>>                 %s pid %d\n",
>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>>
>>             b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>>
>>                 index 2797fd84432b..3507046d33e6 100644
>>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>>                 @@ -122,7 +122,8 @@ static int
>>                 gmc_v11_0_process_interrupt(struct
>>
>>             amdgpu_device *adev,
>>
>>                 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>>
>>                 amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>                 addr, status,
>>                 - entry->vmid_src ? AMDGPU_MMHUB0(0) :
>>
>>             AMDGPU_GFXHUB(0));
>>
>>                 + entry->vmid_src ? AMDGPU_MMHUB0(0) :
>>
>>             AMDGPU_GFXHUB(0),
>>
>>                 + 0);
>>                        }
>>
>>                        if (printk_ratelimit()) {
>>                 @@ -132,7 +133,7 @@ static int
>>                 gmc_v11_0_process_interrupt(struct
>>
>>             amdgpu_device *adev,
>>
>>                 "[%s] page fault (src_id:%u ring:%u vmid:%u
>>                 pasid:%u)\n",
>>                                        entry->vmid_src ? "mmhub" :
>>                 "gfxhub",
>>                                        entry->src_id, entry->ring_id,
>>                 entry->vmid, entry->pasid);
>>                 -             task_info =
>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>                 +             task_info =
>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>>                                if (task_info) {
>>                                        dev_err(adev->dev,
>>                                                " in process %s pid %d
>>                 thread %s pid %d)\n",
>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>
>>             b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>
>>                 index 60acf676000b..9844564c6c74 100644
>>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>                 @@ -115,7 +115,8 @@ static int
>>                 gmc_v12_0_process_interrupt(struct
>>
>>             amdgpu_device *adev,
>>
>>                 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>>
>>                 amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>                 addr, status,
>>                 - entry->vmid_src ? AMDGPU_MMHUB0(0) :
>>
>>             AMDGPU_GFXHUB(0));
>>
>>                 + entry->vmid_src ? AMDGPU_MMHUB0(0) :
>>
>>             AMDGPU_GFXHUB(0),
>>
>>                 + 0);
>>                        }
>>
>>                        if (printk_ratelimit()) {
>>                 @@ -125,7 +126,7 @@ static int
>>                 gmc_v12_0_process_interrupt(struct
>>
>>             amdgpu_device *adev,
>>
>>                 "[%s] page fault (src_id:%u ring:%u vmid:%u
>>                 pasid:%u)\n",
>>                                        entry->vmid_src ? "mmhub" :
>>                 "gfxhub",
>>                                        entry->src_id, entry->ring_id,
>>                 entry->vmid, entry->pasid);
>>                 -             task_info =
>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>                 +             task_info =
>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>>                                if (task_info) {
>>                                        dev_err(adev->dev,
>>                                                " in process %s pid %d
>>                 thread %s pid %d)\n",
>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>
>>             b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>
>>                 index 994432fb57ea..2cdb0cbb7c4d 100644
>>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>                 @@ -1268,7 +1268,8 @@ static int
>>                 gmc_v7_0_process_interrupt(struct
>>
>>             amdgpu_device *adev,
>>
>>                                return 0;
>>
>>                        amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>                 - ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
>>
>>             AMDGPU_GFXHUB(0));
>>
>>                 + ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
>>                 + AMDGPU_GFXHUB(0), 0);
>>
>>                        if (amdgpu_vm_fault_stop ==
>>                 AMDGPU_VM_FAULT_STOP_FIRST)
>>                 gmc_v7_0_set_fault_enable_default(adev, false);
>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>
>>             b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>
>>                 index 86488c052f82..6855caeb7f74 100644
>>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>                 @@ -1437,7 +1437,8 @@ static int
>>                 gmc_v8_0_process_interrupt(struct
>>
>>             amdgpu_device *adev,
>>
>>                                return 0;
>>
>>                        amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>                 - ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
>>
>>             AMDGPU_GFXHUB(0));
>>
>>                 + ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
>>                 + AMDGPU_GFXHUB(0), 0);
>>
>>                        if (amdgpu_vm_fault_stop ==
>>                 AMDGPU_VM_FAULT_STOP_FIRST)
>>                 gmc_v8_0_set_fault_enable_default(adev, false);
>>                 @@ -1448,7 +1449,7 @@ static int
>>                 gmc_v8_0_process_interrupt(struct
>>
>>             amdgpu_device *adev,
>>
>>                 dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
>>                                        entry->src_id,
>>                 entry->src_data[0]);
>>
>>                 -             task_info =
>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>                 +             task_info =
>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>>                                if (task_info) {
>>                                        dev_err(adev->dev, " for
>>                 process %s pid %d thread %s pid %d\n",
>>                 task_info->process_name, task_info->tgid,
>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>
>>             b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>
>>                 index b73136d390cc..e183e08b2c02 100644
>>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>                 @@ -556,10 +556,12 @@ static int
>>                 gmc_v9_0_process_interrupt(struct
>>
>>             amdgpu_device *adev,
>>
>>                        unsigned int vmhub;
>>                        u64 addr;
>>                        uint32_t cam_index = 0;
>>                 -     int ret, xcc_id = 0;
>>                 -     uint32_t node_id;
>>                 +     int ret;
>>                 +     uint32_t node_id, xcc_id, xcp_id;
>>
>>                        node_id = entry->node_id;
>>                 +     xcc_id = amdgpu_amdkfd_node_id_to_xcc_id(adev,
>>                 node_id);
>>                 +     xcp_id = xcc_id/adev->gfx.num_xcc_per_xcp;
>>
>>                        addr = (u64)entry->src_data[0] << 12;
>>                        addr |= ((u64)entry->src_data[1] & 0xf) << 44;
>>                 @@ -572,12 +574,6 @@ static int
>>                 gmc_v9_0_process_interrupt(struct
>>
>>             amdgpu_device *adev,
>>
>>                                vmhub = AMDGPU_MMHUB1(0);
>>                        } else {
>>                                hub_name = "gfxhub0";
>>                 -             if
>>                 (adev->gfx.funcs->ih_node_to_logical_xcc) {
>>                 -                     xcc_id =
>>                 adev->gfx.funcs->ih_node_to_logical_xcc(adev,
>>                 -                             node_id);
>>                 -                     if (xcc_id < 0)
>>                 -                             xcc_id = 0;
>>                 -             }
>>                                vmhub = xcc_id;
>>                        }
>>                        hub = &adev->vmhub[vmhub];
>>                 @@ -631,7 +627,7 @@ static int
>>                 gmc_v9_0_process_interrupt(struct
>>
>>             amdgpu_device *adev,
>>
>>                 retry_fault ? "retry" : "no-retry",
>>                                entry->src_id, entry->ring_id,
>>                 entry->vmid, entry->pasid);
>>
>>                 -     task_info = amdgpu_vm_get_task_info_pasid(adev,
>>                 entry->pasid);
>>                 +     task_info = amdgpu_vm_get_task_info_pasid(adev,
>>                 entry->pasid,
>>
>>             xcp_id);
>>
>>                        if (task_info) {
>>                                dev_err(adev->dev,
>>                                        " for process %s pid %d thread
>>                 %s pid %d)\n",
>>                 @@ -675,7 +671,7 @@ static int
>>                 gmc_v9_0_process_interrupt(struct
>>
>>             amdgpu_device *adev,
>>
>>                        if (!amdgpu_sriov_vf(adev))
>>                 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>>
>>                 -     amdgpu_vm_update_fault_cache(adev,
>>                 entry->pasid, addr, status,
>>
>>             vmhub);
>>
>>                 + amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>                 addr, status,
>>
>>             vmhub, xcp_id);
>>
>>                 dev_err(adev->dev,
>>                 "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>
>>             b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>
>>                 index 23ef4eb36b40..1ac4224bbe5b 100644
>>                 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>                 +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>                 @@ -2182,7 +2182,7 @@ static int
>>                 sdma_v4_0_print_iv_entry(struct
>>
>>             amdgpu_device *adev,
>>
>>                 instance, addr, entry->src_id, entry->ring_id,
>>                 entry->vmid,
>>                                           entry->pasid);
>>
>>                 -     task_info = amdgpu_vm_get_task_info_pasid(adev,
>>                 entry->pasid);
>>                 +     task_info = amdgpu_vm_get_task_info_pasid(adev,
>>                 entry->pasid, 0);
>>                        if (task_info) {
>>                 dev_dbg_ratelimited(adev->dev,
>>                                                    " for process %s
>>                 pid %d thread %s pid %d\n",
>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>>
>>             b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>>
>>                 index 57f16c09abfc..c8b5c0302ca7 100644
>>                 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>>                 +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>>                 @@ -1683,6 +1683,8 @@ static int
>>                 sdma_v4_4_2_print_iv_entry(struct
>>
>>             amdgpu_device *adev,
>>
>>                        int instance;
>>                        struct amdgpu_task_info *task_info;
>>                        u64 addr;
>>                 +     uint32_t xcc_id =
>>                 amdgpu_amdkfd_node_id_to_xcc_id(adev, entry-
>>                 node_id);
>>                 +     uint32_t xcp_id =
>>                 xcc_id/adev->gfx.num_xcc_per_xcp;
>>
>>                        instance = sdma_v4_4_2_irq_id_to_seq(adev,
>>                 entry->client_id);
>>                        if (instance < 0 || instance >=
>>                 adev->sdma.num_instances) {
>>                 @@ -1698,7 +1700,7 @@ static int
>>                 sdma_v4_4_2_print_iv_entry(struct
>>
>>             amdgpu_device *adev,
>>
>>                 instance, addr, entry->src_id, entry->ring_id,
>>                 entry->vmid,
>>                                            entry->pasid);
>>
>>                 -     task_info = amdgpu_vm_get_task_info_pasid(adev,
>>                 entry->pasid);
>>                 +     task_info = amdgpu_vm_get_task_info_pasid(adev,
>>                 entry->pasid,
>>
>>             xcp_id);
>>
>>                        if (task_info) {
>>                 dev_dbg_ratelimited(adev->dev, " for process %s pid
>>                 %d thread %s
>>
>>             pid %d\n",
>>
>>                 task_info->process_name, task_info->tgid,
>>                 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>
>>             b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>
>>                 index ea3792249209..c098fbaf0e1c 100644
>>                 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>                 +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>                 @@ -1262,8 +1262,9 @@ void
>>                 kfd_signal_reset_event(struct kfd_node
>>
>>             *dev)
>>
>>                                if (dev->dqm->detect_hang_count) {
>>                                        struct amdgpu_task_info *ti;
>>                 +                     uint32_t xcp_id = dev->xcp ?
>>                 dev->xcp->id : 0;
>>
>>                 -                     ti =
>>                 amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid);
>>                 +                     ti =
>>                 amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid,
>>
>>             xcp_id);
>>
>>                                        if (ti) {
>>                 dev_err(dev->adev->dev,
>>                 "Queues reset on process %s tid %d thread %s pid %d\n",
>>                 diff --git
>>                 a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>>
>>             b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>>
>>                 index 8e0d0356e810..d7cbf9525698 100644
>>                 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>>                 +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>>                 @@ -377,12 +377,8 @@ static void
>>                 event_interrupt_wq_v10(struct
>>
>>             kfd_node *dev,
>>
>>                                struct kfd_hsa_memory_exception_data
>>                 exception_data;
>>
>>                                /* gfxhub */
>>                 -             if (!vmid_type &&
>>                 dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
>>                 -                     hub_inst =
>>                 dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev-
>>                 adev,
>>                 -                             node_id);
>>                 -                     if (hub_inst < 0)
>>                 -                             hub_inst = 0;
>>                 -             }
>>                 +             if (!vmid_type)
>>                 +                     hub_inst =
>>                 amdgpu_amdkfd_node_id_to_xcc_id(dev->adev,
>>
>>             node_id);
>>
>>                                /* mmhub */
>>                                if (vmid_type && client_id ==
>>                 SOC15_IH_CLIENTID_VMC)
>>                 diff --git
>>                 a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>>
>>             b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>>
>>                 index a9c3580be8c9..4708b8c811a5 100644
>>                 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>>                 +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>>                 @@ -437,12 +437,8 @@ static void
>>                 event_interrupt_wq_v9(struct
>>
>>             kfd_node *dev,
>>
>>                                struct kfd_hsa_memory_exception_data
>>                 exception_data;
>>
>>                                /* gfxhub */
>>                 -             if (!vmid_type &&
>>                 dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
>>                 -                     hub_inst =
>>                 dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev-
>>                 adev,
>>                 -                             node_id);
>>                 -                     if (hub_inst < 0)
>>                 -                             hub_inst = 0;
>>                 -             }
>>                 +             if (!vmid_type)
>>                 +                     hub_inst =
>>                 amdgpu_amdkfd_node_id_to_xcc_id(dev->adev,
>>
>>             node_id);
>>
>>                                /* mmhub */
>>                                if (vmid_type && client_id ==
>>                 SOC15_IH_CLIENTID_VMC)
>>                 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>>
>>             b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>>
>>                 index ea6a8e43bd5b..b5f2f5b1069c 100644
>>                 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>>                 +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>>                 @@ -251,8 +251,9 @@ void
>>
>>             kfd_smi_event_update_thermal_throttling(struct kfd_node
>>             *dev,
>>
>>                    void kfd_smi_event_update_vmfault(struct kfd_node
>>                 *dev, uint16_t pasid)
>>                    {
>>                        struct amdgpu_task_info *task_info;
>>                 +     uint32_t xcp_id = dev->xcp ? dev->xcp->id : 0;
>>
>>                 -     task_info =
>>                 amdgpu_vm_get_task_info_pasid(dev->adev, pasid);
>>                 +     task_info =
>>                 amdgpu_vm_get_task_info_pasid(dev->adev, pasid, xcp_id);
>>                        if (task_info) {
>>                                /* Report VM faults from user
>>                 applications, not retry from kernel */
>>                                if (task_info->pid)
>>

[-- Attachment #2: Type: text/html, Size: 125045 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions
  2024-09-11  6:54             ` Christian König
@ 2024-09-11 13:40               ` Philip Yang
  2024-09-11 13:58                 ` Christian König
  0 siblings, 1 reply; 11+ messages in thread
From: Philip Yang @ 2024-09-11 13:40 UTC (permalink / raw)
  To: Christian König, Chen, Xiaogang, Kim, Jonathan, Yang, Philip,
	amd-gfx@lists.freedesktop.org
  Cc: Kuehling, Felix, Deucher, Alexander, Joshi, Mukul

[-- Attachment #1: Type: text/html, Size: 130762 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions
  2024-09-11 13:40               ` Philip Yang
@ 2024-09-11 13:58                 ` Christian König
  2024-09-25 19:20                   ` Felix Kuehling
  0 siblings, 1 reply; 11+ messages in thread
From: Christian König @ 2024-09-11 13:58 UTC (permalink / raw)
  To: Philip Yang, Chen, Xiaogang, Kim, Jonathan, Yang, Philip,
	amd-gfx@lists.freedesktop.org
  Cc: Kuehling, Felix, Deucher, Alexander, Joshi, Mukul

[-- Attachment #1: Type: text/plain, Size: 53461 bytes --]

Am 11.09.24 um 15:40 schrieb Philip Yang:
>
> On 2024-09-11 02:54, Christian König wrote:
>
>> Yeah, I completely agree with Xiaogang.
>>
>> The PASID is an identifier of an address space. And the idea of the 
>> KFD was that we can just use the same address space and with it the 
>> page tables for multiple execution devices, e.g. CPUs, GPUs etc...
>>
>> That idea turned out to be a bad one because it clashes with some use 
>> cases (e.g. native context virtualization). The better approach is to 
>> see the CPU and GPU processes as separate things which just share the 
>> same underlying data.
>>
>> Opening the KFD node multiple times currently results in the same KFD 
>> process being used. We should probably consider changing that.
>
> It is one KFD process binding to one app process, with count to 
> support multiple open/close of for the same process.
>

Yeah and to repeat myself: That seems to be a bad idea.

> The IOMMU most likely uses Linux process pid, not from kfd 
> process->pasid. The KFD process->pasid is passed to F/W to map queues, 
> flush TLB.
>

Actually no, the IOMMU uses driver assigned PASIDs.

> The reason to replace vm->pasid with KFD process->pasid is to find vm 
> from fault pasid, then for compute vm, find the kfd process from pasid.
>

Why aren't we doing it the other way around? In other words start using 
the VM assigned PASID?

IIRC that the KFD overwrites the PASID inside the VM was only done for 
the ATC/IOMMU handling, which is now completely gone.

Regards,
Christian.

> I can see a bug in amdgpu_vm_handle_fault, only for compute vm, to 
> force update PTE no-retry-fault to the incorrect VM for multiple 
> partitions. This patch will fix this bug but we can have a simple fix.
>
> Regards,
>
> Philip
>
>>
>> Regards,
>> Christian.
>>
>> Am 11.09.24 um 01:59 schrieb Chen, Xiaogang:
>>>
>>>
>>> You want have 1:1 mapping between vm and pasid so can query vm from 
>>> pasid.  I think there is a basic existing issue that we cannot have 
>>> vm and pasid 1:1 correspondence.
>>>
>>> PASIDs are global address space identifiers that can be shared 
>>> between the GPU, an IOMMU and the driver. One app should have one 
>>> pasid that iommu uses to decide which page table to use when device 
>>> access system resource. But one app can open render/kfd node 
>>> multiple times even for one gpu. That said one app could have 
>>> multiple GPU vms .
>>>
>>> I think we did not have this issue because app usually open a rent 
>>> node or kfd node only once. With one adev has multiple partitions 
>>> there are multiple vms on one adev, so have this issue.  But the 
>>> root cause is not from multiple partitions and solution is not to 
>>> introduce multiple pasids. I think we should have one pasid for one 
>>> app and use different way to get vm from pasid.
>>>
>>>
>>> Regards
>>>
>>> Xiaogang
>>>
>>> On 9/10/2024 3:47 PM, Kim, Jonathan wrote:
>>>>
>>>> [Public]
>>>>
>>>>
>>>>
>>>> 	
>>>> Caution: This message originated from an External Source. Use 
>>>> proper caution when opening attachments, clicking links, or 
>>>> responding.
>>>>
>>>>
>>>> [Public]
>>>>
>>>>
>>>> KMS open still set per pasid-vm bindings per adev (socket) so I 
>>>> don’t see how the per-partition overwrite PASID issue is primarily 
>>>> a KFD concern.
>>>>
>>>> Are you saying the KFD process devices holds a shadow copy of the 
>>>> correct VM during page restore during fault?
>>>>
>>>> Doesn’t it acquire the wrong VM object on process init in the first 
>>>> place?
>>>>
>>>> Even if it were the case the KFD had a separate VM reference, the 
>>>> underlying IRQ fault handling is still broken.
>>>>
>>>> We probably don’t want to bandage over something to fix one symptom.
>>>>
>>>> Jon
>>>>
>>>> *From:*Yang, Philip <Philip.Yang@amd.com>
>>>> *Sent:* Tuesday, September 10, 2024 11:24 AM
>>>> *To:* Koenig, Christian <Christian.Koenig@amd.com>; Kim, Jonathan 
>>>> <Jonathan.Kim@amd.com>; amd-gfx@lists.freedesktop.org
>>>> *Cc:* Kuehling, Felix <Felix.Kuehling@amd.com>; Deucher, Alexander 
>>>> <Alexander.Deucher@amd.com>; Joshi, Mukul <Mukul.Joshi@amd.com>
>>>> *Subject:* Re: [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple 
>>>> partitions
>>>>
>>>> On 2024-09-09 14:46, Christian König wrote:
>>>>
>>>>     Am 09.09.24 um 18:02 schrieb Kim, Jonathan:
>>>>
>>>>         [Public]
>>>>
>>>>
>>>>             -----Original Message-----
>>>>             From: Christian König
>>>>             <ckoenig.leichtzumerken@gmail.com>
>>>>             <mailto:ckoenig.leichtzumerken@gmail.com>
>>>>             Sent: Thursday, September 5, 2024 10:24 AM
>>>>             To: Kim, Jonathan <Jonathan.Kim@amd.com>
>>>>             <mailto:Jonathan.Kim@amd.com>;
>>>>             amd-gfx@lists.freedesktop.org
>>>>             Cc: Kuehling, Felix <Felix.Kuehling@amd.com>
>>>>             <mailto:Felix.Kuehling@amd.com>; Deucher, Alexander
>>>>             <Alexander.Deucher@amd.com>
>>>>             <mailto:Alexander.Deucher@amd.com>; Joshi, Mukul
>>>>             <Mukul.Joshi@amd.com> <mailto:Mukul.Joshi@amd.com>
>>>>             Subject: Re: [PATCH] drm/amdkfd: fix vm-pasid lookup
>>>>             for multiple partitions
>>>>
>>>>             Caution: This message originated from an External
>>>>             Source. Use proper caution
>>>>             when opening attachments, clicking links, or responding.
>>>>
>>>>
>>>>             Am 19.08.24 um 19:59 schrieb Jonathan Kim:
>>>>
>>>>                 Currently multiple partitions will incorrectly
>>>>                 overwrite the VM lookup
>>>>                 table since the table is indexed by PASID and
>>>>                 multiple partitions can
>>>>                 register different VM objects on the same PASID.
>>>>
>>>>             That's a rather bad idea. Why do we have the same PASID
>>>>             for different VM
>>>>             objects in the first place?
>>>>
>>>>         Alex can probably elaborate on the KGD side, but from what
>>>>         I can see, the KMS driver open call has always assigned a
>>>>         new VM object per PASID on an open call.
>>>>         The KFD acquires and replaces the KGD PASID-VMID
>>>>         registration on its own compute process open/creation call.
>>>>         If this is the bad_idea you're referring to, then someone
>>>>         else will have to chime in.  I don't have much history on
>>>>         this unfortunately.
>>>>
>>>>
>>>>     Yeah, Felix and I designed that.
>>>>
>>>> app opens drm node to create vm for each partition, with different 
>>>> vm->pasid for each vm, issue is from kfd_ioctl_acquire_vm -> 
>>>> kfd_process_device_init_vm -> amdgpu_amdkfd_gpuvm_set_vm_pasid, to 
>>>> replace all vm->pasid with kfd process->pasid, which is from open 
>>>> kfd node. This ends up to store only one vm to 
>>>> adev->vm_manager.pasids with KFD process pasid, so we cannot 
>>>> retrieve correct vm from adev->vm_manager.pasids on mGPUs or 
>>>> multiple partitions.
>>>>
>>>>
>>>>
>>>>         That aside, the current problem is, is that all KFD device
>>>>         structures are logical partitions and register their
>>>>         PASID-VM binding using this concept of a device.
>>>>
>>>>
>>>>     As far as I can see that is the fundamental problem. This needs
>>>>     to be fixed instead.
>>>>
>>>>
>>>>         On the KGD side however, the registration table is
>>>>         maintained in the adev struct, which is a physical socket.
>>>>         So there's a mismatch in understanding of what a device is
>>>>         between the KFD & KGD with regard to the look up table that
>>>>         results in bad bindings.
>>>>
>>>>         Adding a per-partition dimension to the existing lookup
>>>>         table resolves issues where seeing, for example, with
>>>>         memory violation interception and XNACK i.e bad bindings
>>>>         result in wrong vm object found to set no-retry flags on
>>>>         memory violations.
>>>>
>>>> svm_range_restore_pages retry fault recover uses fault pasid to get 
>>>> kfd process, and use the fault node_id to get pdd->vm, maybe you 
>>>> can use this way to fix the debugger issue.
>>>>
>>>> Regards,
>>>>
>>>> Philip
>>>>
>>>>
>>>>     Yeah that is pretty much a no-go.
>>>>
>>>>     The PASID and how it is used is defined by the PCIe
>>>>     specifications. If we now start to assign multiple VMs to the
>>>>     same PASID then we are violating the PCIe specification.
>>>>
>>>>     The problems you see are most likely just the tip of the
>>>>     iceberg here.
>>>>
>>>>     Regards,
>>>>     Christian.
>>>>
>>>>
>>>>
>>>>         Jon
>>>>
>>>>
>>>>             Regards,
>>>>             Christian.
>>>>
>>>>
>>>>                 This results in loading the wrong VM object on
>>>>                 PASID query.
>>>>
>>>>                 To correct this, setup the lookup table to be
>>>>                 per-partition-per-PASID
>>>>                 instead.
>>>>
>>>>                 Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
>>>>                 <mailto:jonathan.kim@amd.com>
>>>>                 ---
>>>>                 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 12 ++++
>>>>                 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  1 +
>>>>                 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  4 +-
>>>>                 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  7 ++-
>>>>                 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55
>>>>                 +++++++++++------
>>>>
>>>>             -- 
>>>>
>>>>                 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 11 +++-
>>>>                 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  5 +-
>>>>                 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c |  5 +-
>>>>                 drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c |  5 +-
>>>>                 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c |  3 +-
>>>>                 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c |  5 +-
>>>>                 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 ++----
>>>>                 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  2 +-
>>>>                 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c |  4 +-
>>>>                 drivers/gpu/drm/amd/amdkfd/kfd_events.c |  3 +-
>>>>                 .../gpu/drm/amd/amdkfd/kfd_int_process_v10.c |  8 +--
>>>>                 .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c |  8 +--
>>>>                 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c |  3 +-
>>>>                    18 files changed, 92 insertions(+), 65 deletions(-)
>>>>
>>>>                 diff --git
>>>>                 a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>>
>>>>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>>
>>>>                 index c272461d70a9..28db789610e1 100644
>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>>                 @@ -887,3 +887,15 @@ int
>>>>                 amdgpu_amdkfd_unmap_hiq(struct
>>>>
>>>>             amdgpu_device *adev, u32 doorbell_off,
>>>>
>>>>                        return r;
>>>>                    }
>>>>                 +
>>>>                 +int amdgpu_amdkfd_node_id_to_xcc_id(struct
>>>>                 amdgpu_device *adev,
>>>>
>>>>             uint32_t node_id)
>>>>
>>>>                 +{
>>>>                 +     if (adev->gfx.funcs->ih_node_to_logical_xcc) {
>>>>                 +             int xcc_id =
>>>>                 adev->gfx.funcs->ih_node_to_logical_xcc(adev,
>>>>                 node_id);
>>>>                 +
>>>>                 +             if (xcc_id >= 0)
>>>>                 +                     return xcc_id;
>>>>                 +     }
>>>>                 +
>>>>                 +     return 0;
>>>>                 +}
>>>>                 diff --git
>>>>                 a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>
>>>>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>
>>>>                 index 4ed49265c764..bf8bb45d8ab6 100644
>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>                 @@ -356,6 +356,7 @@ void
>>>>                 amdgpu_amdkfd_unreserve_mem_limit(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                 uint64_t size, u32 alloc_flag, int8_t xcp_id);
>>>>
>>>>                    u64 amdgpu_amdkfd_xcp_memory_size(struct
>>>>                 amdgpu_device *adev, int
>>>>
>>>>             xcp_id);
>>>>
>>>>                 +int amdgpu_amdkfd_node_id_to_xcc_id(struct
>>>>                 amdgpu_device *adev,
>>>>
>>>>             uint32_t node_id);
>>>>
>>>>                    #define KFD_XCP_MEM_ID(adev, xcp_id) \
>>>>                                ((adev)->xcp_mgr && (xcp_id) >= 0 ?\
>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>>>
>>>>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>>>
>>>>                 index c6a1783fc9ef..bf9f8802e18d 100644
>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>>>                 @@ -37,7 +37,7 @@ static enum drm_gpu_sched_stat
>>>>
>>>>             amdgpu_job_timedout(struct drm_sched_job *s_job)
>>>>
>>>>                        struct amdgpu_job *job = to_amdgpu_job(s_job);
>>>>                        struct amdgpu_task_info *ti;
>>>>                        struct amdgpu_device *adev = ring->adev;
>>>>                 -     int idx;
>>>>                 +     int idx, xcp_id = !job->vm ? 0 :
>>>>                 job->vm->xcp_id;
>>>>                        int r;
>>>>
>>>>                        if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
>>>>                 @@ -62,7 +62,7 @@ static enum drm_gpu_sched_stat
>>>>
>>>>             amdgpu_job_timedout(struct drm_sched_job *s_job)
>>>>
>>>>                 job->base.sched->name,
>>>>                 atomic_read(&ring->fence_drv.last_seq),
>>>>                 ring->fence_drv.sync_seq);
>>>>
>>>>                 -     ti =
>>>>                 amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid);
>>>>                 +     ti =
>>>>                 amdgpu_vm_get_task_info_pasid(ring->adev,
>>>>                 job->pasid, xcp_id);
>>>>                        if (ti) {
>>>>                                dev_err(adev->dev,
>>>>                                        "Process information:
>>>>                 process %s pid %d thread %s pid %d\n",
>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>>
>>>>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>>
>>>>                 index d9fde38f6ee2..e413bf4a3e84 100644
>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>>                 @@ -1275,17 +1275,20 @@ int
>>>>                 amdgpu_info_ioctl(struct drm_device *dev,
>>>>
>>>>             void *data, struct drm_file *filp)
>>>>
>>>>                                struct amdgpu_vm *vm = &fpriv->vm;
>>>>                                struct drm_amdgpu_info_gpuvm_fault
>>>>                 gpuvm_fault;
>>>>                                unsigned long flags;
>>>>                 +             int i;
>>>>
>>>>                                if (!vm)
>>>>                                        return -EINVAL;
>>>>
>>>>                                memset(&gpuvm_fault, 0,
>>>>                 sizeof(gpuvm_fault));
>>>>
>>>>                 - xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>>>                 +             for (i = 0; i <
>>>>                 adev->xcp_mgr->num_xcps; i++)
>>>>                 + xa_lock_irqsave(&adev->vm_manager.pasids[i], flags);
>>>>                                gpuvm_fault.addr = vm->fault_info.addr;
>>>>                                gpuvm_fault.status =
>>>>                 vm->fault_info.status;
>>>>                                gpuvm_fault.vmhub =
>>>>                 vm->fault_info.vmhub;
>>>>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids,
>>>>                 flags);
>>>>                 +             for (i = 0; i <
>>>>                 adev->xcp_mgr->num_xcps; i++)
>>>>                 + xa_unlock_irqrestore(&adev->vm_manager.pasids[i],
>>>>                 flags);
>>>>
>>>>                                return copy_to_user(out, &gpuvm_fault,
>>>>                 min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0;
>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>
>>>>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>
>>>>                 index bcb729094521..f43e1c15f423 100644
>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>                 @@ -146,7 +146,7 @@ int amdgpu_vm_set_pasid(struct
>>>>                 amdgpu_device
>>>>
>>>>             *adev, struct amdgpu_vm *vm,
>>>>
>>>>                                return 0;
>>>>
>>>>                        if (vm->pasid) {
>>>>                 -             r =
>>>>                 xa_err(xa_erase_irq(&adev->vm_manager.pasids,
>>>>                 vm->pasid));
>>>>                 +             r =
>>>>                 xa_err(xa_erase_irq(&adev->vm_manager.pasids[vm->xcp_id],
>>>>
>>>>
>>>>             vm->pasid));
>>>>
>>>>                                if (r < 0)
>>>>                                        return r;
>>>>
>>>>                 @@ -154,7 +154,7 @@ int amdgpu_vm_set_pasid(struct
>>>>                 amdgpu_device
>>>>
>>>>             *adev, struct amdgpu_vm *vm,
>>>>
>>>>                        }
>>>>
>>>>                        if (pasid) {
>>>>                 -             r =
>>>>                 xa_err(xa_store_irq(&adev->vm_manager.pasids,
>>>>                 pasid, vm,
>>>>                 +             r =
>>>>                 xa_err(xa_store_irq(&adev->vm_manager.pasids[vm->xcp_id],
>>>>
>>>>
>>>>             pasid, vm,
>>>>
>>>>                 GFP_KERNEL));
>>>>                                if (r < 0)
>>>>                                        return r;
>>>>                 @@ -2288,14 +2288,14 @@ static void
>>>>
>>>>             amdgpu_vm_destroy_task_info(struct kref *kref)
>>>>
>>>>                    }
>>>>
>>>>                    static inline struct amdgpu_vm *
>>>>                 -amdgpu_vm_get_vm_from_pasid(struct amdgpu_device
>>>>                 *adev, u32 pasid)
>>>>                 +amdgpu_vm_get_vm_from_pasid(struct amdgpu_device
>>>>                 *adev, u32 pasid,
>>>>
>>>>             u32 xcp_id)
>>>>
>>>>                    {
>>>>                        struct amdgpu_vm *vm;
>>>>                        unsigned long flags;
>>>>
>>>>                 - xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>>>                 -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids,
>>>>                 flags);
>>>>                 + xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id],
>>>>                 flags);
>>>>                 +     vm =
>>>>                 xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
>>>>                 +
>>>>                 xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id],
>>>>                 flags);
>>>>
>>>>                        return vm;
>>>>                    }
>>>>                 @@ -2343,10 +2343,10 @@
>>>>                 amdgpu_vm_get_task_info_vm(struct
>>>>
>>>>             amdgpu_vm *vm)
>>>>
>>>>                     * referenced down with amdgpu_vm_put_task_info.
>>>>                     */
>>>>                    struct amdgpu_task_info *
>>>>                 -amdgpu_vm_get_task_info_pasid(struct amdgpu_device
>>>>                 *adev, u32 pasid)
>>>>                 +amdgpu_vm_get_task_info_pasid(struct amdgpu_device
>>>>                 *adev, u32 pasid,
>>>>
>>>>             u32 xcp_id)
>>>>
>>>>                    {
>>>>                        return amdgpu_vm_get_task_info_vm(
>>>>                 - amdgpu_vm_get_vm_from_pasid(adev, pasid));
>>>>                 + amdgpu_vm_get_vm_from_pasid(adev, pasid, xcp_id));
>>>>                    }
>>>>
>>>>                    static int amdgpu_vm_create_task_info(struct
>>>>                 amdgpu_vm *vm)
>>>>                 @@ -2481,6 +2481,8 @@ int amdgpu_vm_init(struct
>>>>                 amdgpu_device
>>>>
>>>>             *adev, struct amdgpu_vm *vm,
>>>>
>>>>                 amdgpu_bo_unreserve(vm->root.bo);
>>>>                        amdgpu_bo_unref(&root_bo);
>>>>
>>>>                 +     vm->xcp_id = xcp_id < 0 ? 0 : xcp_id;
>>>>                 +
>>>>                        return 0;
>>>>
>>>>                    error_free_root:
>>>>                 @@ -2695,8 +2697,8 @@ void
>>>>                 amdgpu_vm_manager_init(struct
>>>>
>>>>             amdgpu_device *adev)
>>>>
>>>>                    #else
>>>>                 adev->vm_manager.vm_update_mode = 0;
>>>>                    #endif
>>>>                 -
>>>>                 - xa_init_flags(&adev->vm_manager.pasids,
>>>>                 XA_FLAGS_LOCK_IRQ);
>>>>                 +     for (i = 0; i < MAX_XCP; i++)
>>>>                 + xa_init_flags(&(adev->vm_manager.pasids[i]),
>>>>
>>>>             XA_FLAGS_LOCK_IRQ);
>>>>
>>>>                    }
>>>>
>>>>                    /**
>>>>                 @@ -2708,10 +2710,15 @@ void
>>>>                 amdgpu_vm_manager_init(struct
>>>>
>>>>             amdgpu_device *adev)
>>>>
>>>>                     */
>>>>                    void amdgpu_vm_manager_fini(struct amdgpu_device
>>>>                 *adev)
>>>>                    {
>>>>                 - WARN_ON(!xa_empty(&adev->vm_manager.pasids));
>>>>                 - xa_destroy(&adev->vm_manager.pasids);
>>>>                 +     int i;
>>>>                 +
>>>>                 +     for (i = 0; i < MAX_XCP; i++) {
>>>>                 + WARN_ON(!xa_empty(&adev->vm_manager.pasids[i]));
>>>>                 + xa_destroy(&adev->vm_manager.pasids[i]);
>>>>                 +     }
>>>>
>>>>                        amdgpu_vmid_mgr_fini(adev);
>>>>                 +
>>>>                    }
>>>>
>>>>                    /**
>>>>                 @@ -2778,17 +2785,18 @@ bool
>>>>                 amdgpu_vm_handle_fault(struct
>>>>
>>>>             amdgpu_device *adev, u32 pasid,
>>>>
>>>>                        unsigned long irqflags;
>>>>                        uint64_t value, flags;
>>>>                        struct amdgpu_vm *vm;
>>>>                 -     int r;
>>>>                 +     int r, xcp_id;
>>>>
>>>>                 - xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
>>>>                 -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>>                 +     xcp_id =
>>>>                 amdgpu_amdkfd_node_id_to_xcc_id(adev, node_id)/adev-
>>>>                 gfx.num_xcc_per_xcp;
>>>>                 + xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id],
>>>>                 irqflags);
>>>>                 +     vm =
>>>>                 xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
>>>>                        if (vm) {
>>>>                                root = amdgpu_bo_ref(vm->root.bo);
>>>>                                is_compute_context =
>>>>                 vm->is_compute_context;
>>>>                        } else {
>>>>                                root = NULL;
>>>>                        }
>>>>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids,
>>>>                 irqflags);
>>>>                 +
>>>>                 xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id],
>>>>                 irqflags);
>>>>
>>>>                        if (!root)
>>>>                                return false;
>>>>                 @@ -2806,11 +2814,11 @@ bool
>>>>                 amdgpu_vm_handle_fault(struct
>>>>
>>>>             amdgpu_device *adev, u32 pasid,
>>>>
>>>>                                goto error_unref;
>>>>
>>>>                        /* Double check that the VM still exists */
>>>>                 - xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
>>>>                 -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>>                 + xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id],
>>>>                 irqflags);
>>>>                 +     vm =
>>>>                 xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
>>>>                        if (vm && vm->root.bo != root)
>>>>                                vm = NULL;
>>>>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids,
>>>>                 irqflags);
>>>>                 +
>>>>                 xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id],
>>>>                 irqflags);
>>>>                        if (!vm)
>>>>                                goto error_unlock;
>>>>
>>>>                 @@ -2968,14 +2976,15 @@ void
>>>>                 amdgpu_vm_update_fault_cache(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                 unsigned int pasid,
>>>>                 uint64_t addr,
>>>>                 uint32_t status,
>>>>                 -                               unsigned int vmhub)
>>>>                 +                               unsigned int vmhub,
>>>>                 +                               uint32_t xcp_id)
>>>>                    {
>>>>                        struct amdgpu_vm *vm;
>>>>                        unsigned long flags;
>>>>
>>>>                 - xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>>>                 + xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id],
>>>>                 flags);
>>>>
>>>>                 -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>>                 +     vm =
>>>>                 xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
>>>>                        /* Don't update the fault cache if status is
>>>>                 0.  In the multiple
>>>>                         * fault case, subsequent faults will return
>>>>                 a 0 status which is
>>>>                         * useless for userspace and replaces the
>>>>                 useful fault status, so
>>>>                 @@ -3008,7 +3017,7 @@ void
>>>>                 amdgpu_vm_update_fault_cache(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                 WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
>>>>                                }
>>>>                        }
>>>>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids,
>>>>                 flags);
>>>>                 +
>>>>                 xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id],
>>>>                 flags);
>>>>                    }
>>>>
>>>>                    /**
>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>
>>>>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>
>>>>                 index 046949c4b695..1499f5f731e9 100644
>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>                 @@ -35,6 +35,7 @@
>>>>                    #include "amdgpu_sync.h"
>>>>                    #include "amdgpu_ring.h"
>>>>                    #include "amdgpu_ids.h"
>>>>                 +#include "amdgpu_xcp.h"
>>>>
>>>>                    struct drm_exec;
>>>>
>>>>                 @@ -418,6 +419,9 @@ struct amdgpu_vm {
>>>>
>>>>                        /* cached fault info */
>>>>                        struct amdgpu_vm_fault_info fault_info;
>>>>                 +
>>>>                 +     /* XCP ID */
>>>>                 +     int xcp_id;
>>>>                    };
>>>>
>>>>                    struct amdgpu_vm_manager {
>>>>                 @@ -456,7 +460,7 @@ struct amdgpu_vm_manager {
>>>>                        /* PASID to VM mapping, will be used in
>>>>                 interrupt context to
>>>>                         * look up VM of a page fault
>>>>                         */
>>>>                 -     struct xarray                           pasids;
>>>>                 +     struct xarray pasids[MAX_XCP];
>>>>                        /* Global registration of recent page fault
>>>>                 information */
>>>>                        struct amdgpu_vm_fault_info fault_info;
>>>>                    };
>>>>                 @@ -550,7 +554,7 @@ bool
>>>>                 amdgpu_vm_need_pipeline_sync(struct
>>>>
>>>>             amdgpu_ring *ring,
>>>>
>>>>                    void amdgpu_vm_check_compute_bug(struct
>>>>                 amdgpu_device *adev);
>>>>
>>>>                    struct amdgpu_task_info *
>>>>                 -amdgpu_vm_get_task_info_pasid(struct amdgpu_device
>>>>                 *adev, u32 pasid);
>>>>                 +amdgpu_vm_get_task_info_pasid(struct amdgpu_device
>>>>                 *adev, u32 pasid,
>>>>
>>>>             u32 xcp_id);
>>>>
>>>>                    struct amdgpu_task_info *
>>>>                    amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm);
>>>>                 @@ -649,7 +653,8 @@ void
>>>>                 amdgpu_vm_update_fault_cache(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                 unsigned int pasid,
>>>>                 uint64_t addr,
>>>>                 uint32_t status,
>>>>                 -                               unsigned int vmhub);
>>>>                 +                               unsigned int vmhub,
>>>>                 +                               uint32_t xcp_id);
>>>>                    void amdgpu_vm_tlb_fence_create(struct
>>>>                 amdgpu_device *adev,
>>>>                                                 struct amdgpu_vm *vm,
>>>>                                                 struct dma_fence
>>>>                 **fence);
>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>>>
>>>>             b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>>>
>>>>                 index f0ceab3ce5bf..24b042febf5c 100644
>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>>>                 @@ -151,7 +151,8 @@ static int
>>>>                 gmc_v10_0_process_interrupt(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>>>>
>>>>                 amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>>>                 addr, status,
>>>>                 -                                         
>>>>                 entry->vmid_src ? AMDGPU_MMHUB0(0) :
>>>>
>>>>             AMDGPU_GFXHUB(0));
>>>>
>>>>                 + entry->vmid_src ? AMDGPU_MMHUB0(0) :
>>>>
>>>>             AMDGPU_GFXHUB(0),
>>>>
>>>>                 + 0);
>>>>                        }
>>>>
>>>>                        if (!printk_ratelimit())
>>>>                 @@ -161,7 +162,7 @@ static int
>>>>                 gmc_v10_0_process_interrupt(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                                "[%s] page fault (src_id:%u ring:%u
>>>>                 vmid:%u pasid:%u)\n",
>>>>                                entry->vmid_src ? "mmhub" : "gfxhub",
>>>>                                entry->src_id, entry->ring_id,
>>>>                 entry->vmid, entry->pasid);
>>>>                 -     task_info =
>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>>>                 +     task_info =
>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>>>>                        if (task_info) {
>>>>                                dev_err(adev->dev,
>>>>                                        " in process %s pid %d
>>>>                 thread %s pid %d\n",
>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>>>>
>>>>             b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>>>>
>>>>                 index 2797fd84432b..3507046d33e6 100644
>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>>>>                 @@ -122,7 +122,8 @@ static int
>>>>                 gmc_v11_0_process_interrupt(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>>>>
>>>>                 amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>>>                 addr, status,
>>>>                 -                                         
>>>>                 entry->vmid_src ? AMDGPU_MMHUB0(0) :
>>>>
>>>>             AMDGPU_GFXHUB(0));
>>>>
>>>>                 + entry->vmid_src ? AMDGPU_MMHUB0(0) :
>>>>
>>>>             AMDGPU_GFXHUB(0),
>>>>
>>>>                 + 0);
>>>>                        }
>>>>
>>>>                        if (printk_ratelimit()) {
>>>>                 @@ -132,7 +133,7 @@ static int
>>>>                 gmc_v11_0_process_interrupt(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                 "[%s] page fault (src_id:%u ring:%u vmid:%u
>>>>                 pasid:%u)\n",
>>>>                 entry->vmid_src ? "mmhub" : "gfxhub",
>>>>                                        entry->src_id,
>>>>                 entry->ring_id, entry->vmid, entry->pasid);
>>>>                 -             task_info =
>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>>>                 +             task_info =
>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>>>>                                if (task_info) {
>>>>                 dev_err(adev->dev,
>>>>                                                " in process %s pid
>>>>                 %d thread %s pid %d)\n",
>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>>>
>>>>             b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>>>
>>>>                 index 60acf676000b..9844564c6c74 100644
>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>>>                 @@ -115,7 +115,8 @@ static int
>>>>                 gmc_v12_0_process_interrupt(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>>>>
>>>>                 amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>>>                 addr, status,
>>>>                 -                                         
>>>>                 entry->vmid_src ? AMDGPU_MMHUB0(0) :
>>>>
>>>>             AMDGPU_GFXHUB(0));
>>>>
>>>>                 + entry->vmid_src ? AMDGPU_MMHUB0(0) :
>>>>
>>>>             AMDGPU_GFXHUB(0),
>>>>
>>>>                 + 0);
>>>>                        }
>>>>
>>>>                        if (printk_ratelimit()) {
>>>>                 @@ -125,7 +126,7 @@ static int
>>>>                 gmc_v12_0_process_interrupt(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                 "[%s] page fault (src_id:%u ring:%u vmid:%u
>>>>                 pasid:%u)\n",
>>>>                 entry->vmid_src ? "mmhub" : "gfxhub",
>>>>                                        entry->src_id,
>>>>                 entry->ring_id, entry->vmid, entry->pasid);
>>>>                 -             task_info =
>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>>>                 +             task_info =
>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>>>>                                if (task_info) {
>>>>                 dev_err(adev->dev,
>>>>                                                " in process %s pid
>>>>                 %d thread %s pid %d)\n",
>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>>>
>>>>             b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>>>
>>>>                 index 994432fb57ea..2cdb0cbb7c4d 100644
>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>>>                 @@ -1268,7 +1268,8 @@ static int
>>>>                 gmc_v7_0_process_interrupt(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                                return 0;
>>>>
>>>>                 amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>>>                 - ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
>>>>
>>>>             AMDGPU_GFXHUB(0));
>>>>
>>>>                 + ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
>>>>                 + AMDGPU_GFXHUB(0), 0);
>>>>
>>>>                        if (amdgpu_vm_fault_stop ==
>>>>                 AMDGPU_VM_FAULT_STOP_FIRST)
>>>>                 gmc_v7_0_set_fault_enable_default(adev, false);
>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>>>
>>>>             b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>>>
>>>>                 index 86488c052f82..6855caeb7f74 100644
>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>>>                 @@ -1437,7 +1437,8 @@ static int
>>>>                 gmc_v8_0_process_interrupt(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                                return 0;
>>>>
>>>>                 amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>>>                 - ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
>>>>
>>>>             AMDGPU_GFXHUB(0));
>>>>
>>>>                 + ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
>>>>                 + AMDGPU_GFXHUB(0), 0);
>>>>
>>>>                        if (amdgpu_vm_fault_stop ==
>>>>                 AMDGPU_VM_FAULT_STOP_FIRST)
>>>>                 gmc_v8_0_set_fault_enable_default(adev, false);
>>>>                 @@ -1448,7 +1449,7 @@ static int
>>>>                 gmc_v8_0_process_interrupt(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                 dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
>>>>                                        entry->src_id,
>>>>                 entry->src_data[0]);
>>>>
>>>>                 -             task_info =
>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>>>                 +             task_info =
>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>>>>                                if (task_info) {
>>>>                 dev_err(adev->dev, " for process %s pid %d thread
>>>>                 %s pid %d\n",
>>>>                 task_info->process_name, task_info->tgid,
>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>>
>>>>             b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>>
>>>>                 index b73136d390cc..e183e08b2c02 100644
>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>>                 @@ -556,10 +556,12 @@ static int
>>>>                 gmc_v9_0_process_interrupt(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                        unsigned int vmhub;
>>>>                        u64 addr;
>>>>                        uint32_t cam_index = 0;
>>>>                 -     int ret, xcc_id = 0;
>>>>                 -     uint32_t node_id;
>>>>                 +     int ret;
>>>>                 +     uint32_t node_id, xcc_id, xcp_id;
>>>>
>>>>                        node_id = entry->node_id;
>>>>                 +     xcc_id =
>>>>                 amdgpu_amdkfd_node_id_to_xcc_id(adev, node_id);
>>>>                 +     xcp_id = xcc_id/adev->gfx.num_xcc_per_xcp;
>>>>
>>>>                        addr = (u64)entry->src_data[0] << 12;
>>>>                        addr |= ((u64)entry->src_data[1] & 0xf) << 44;
>>>>                 @@ -572,12 +574,6 @@ static int
>>>>                 gmc_v9_0_process_interrupt(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                                vmhub = AMDGPU_MMHUB1(0);
>>>>                        } else {
>>>>                                hub_name = "gfxhub0";
>>>>                 -             if
>>>>                 (adev->gfx.funcs->ih_node_to_logical_xcc) {
>>>>                 -                     xcc_id =
>>>>                 adev->gfx.funcs->ih_node_to_logical_xcc(adev,
>>>>                 -                             node_id);
>>>>                 -                     if (xcc_id < 0)
>>>>                 -                             xcc_id = 0;
>>>>                 -             }
>>>>                                vmhub = xcc_id;
>>>>                        }
>>>>                        hub = &adev->vmhub[vmhub];
>>>>                 @@ -631,7 +627,7 @@ static int
>>>>                 gmc_v9_0_process_interrupt(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                 retry_fault ? "retry" : "no-retry",
>>>>                                entry->src_id, entry->ring_id,
>>>>                 entry->vmid, entry->pasid);
>>>>
>>>>                 -     task_info =
>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>>>                 +     task_info =
>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid,
>>>>
>>>>             xcp_id);
>>>>
>>>>                        if (task_info) {
>>>>                                dev_err(adev->dev,
>>>>                                        " for process %s pid %d
>>>>                 thread %s pid %d)\n",
>>>>                 @@ -675,7 +671,7 @@ static int
>>>>                 gmc_v9_0_process_interrupt(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                        if (!amdgpu_sriov_vf(adev))
>>>>                 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>>>>
>>>>                 -     amdgpu_vm_update_fault_cache(adev,
>>>>                 entry->pasid, addr, status,
>>>>
>>>>             vmhub);
>>>>
>>>>                 + amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>>>                 addr, status,
>>>>
>>>>             vmhub, xcp_id);
>>>>
>>>>                 dev_err(adev->dev,
>>>>                 "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>>>
>>>>             b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>>>
>>>>                 index 23ef4eb36b40..1ac4224bbe5b 100644
>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>>>                 @@ -2182,7 +2182,7 @@ static int
>>>>                 sdma_v4_0_print_iv_entry(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                 instance, addr, entry->src_id, entry->ring_id,
>>>>                 entry->vmid,
>>>>                 entry->pasid);
>>>>
>>>>                 -     task_info =
>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>>>                 +     task_info =
>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>>>>                        if (task_info) {
>>>>                 dev_dbg_ratelimited(adev->dev,
>>>>                                                    " for process %s
>>>>                 pid %d thread %s pid %d\n",
>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>>>>
>>>>             b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>>>>
>>>>                 index 57f16c09abfc..c8b5c0302ca7 100644
>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>>>>                 @@ -1683,6 +1683,8 @@ static int
>>>>                 sdma_v4_4_2_print_iv_entry(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                        int instance;
>>>>                        struct amdgpu_task_info *task_info;
>>>>                        u64 addr;
>>>>                 +     uint32_t xcc_id =
>>>>                 amdgpu_amdkfd_node_id_to_xcc_id(adev, entry-
>>>>                 node_id);
>>>>                 +     uint32_t xcp_id =
>>>>                 xcc_id/adev->gfx.num_xcc_per_xcp;
>>>>
>>>>                        instance = sdma_v4_4_2_irq_id_to_seq(adev,
>>>>                 entry->client_id);
>>>>                        if (instance < 0 || instance >=
>>>>                 adev->sdma.num_instances) {
>>>>                 @@ -1698,7 +1700,7 @@ static int
>>>>                 sdma_v4_4_2_print_iv_entry(struct
>>>>
>>>>             amdgpu_device *adev,
>>>>
>>>>                 instance, addr, entry->src_id, entry->ring_id,
>>>>                 entry->vmid,
>>>>                 entry->pasid);
>>>>
>>>>                 -     task_info =
>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>>>                 +     task_info =
>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid,
>>>>
>>>>             xcp_id);
>>>>
>>>>                        if (task_info) {
>>>>                 dev_dbg_ratelimited(adev->dev, " for process %s pid
>>>>                 %d thread %s
>>>>
>>>>             pid %d\n",
>>>>
>>>>                 task_info->process_name, task_info->tgid,
>>>>                 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>
>>>>             b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>
>>>>                 index ea3792249209..c098fbaf0e1c 100644
>>>>                 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>                 +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>                 @@ -1262,8 +1262,9 @@ void
>>>>                 kfd_signal_reset_event(struct kfd_node
>>>>
>>>>             *dev)
>>>>
>>>>                                if (dev->dqm->detect_hang_count) {
>>>>                                        struct amdgpu_task_info *ti;
>>>>                 +                     uint32_t xcp_id = dev->xcp ?
>>>>                 dev->xcp->id : 0;
>>>>
>>>>                 -                     ti =
>>>>                 amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid);
>>>>                 +                     ti =
>>>>                 amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid,
>>>>
>>>>             xcp_id);
>>>>
>>>>                 if (ti) {
>>>>                 dev_err(dev->adev->dev,
>>>>                 "Queues reset on process %s tid %d thread %s pid
>>>>                 %d\n",
>>>>                 diff --git
>>>>                 a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>>>>
>>>>             b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>>>>
>>>>                 index 8e0d0356e810..d7cbf9525698 100644
>>>>                 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>>>>                 +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>>>>                 @@ -377,12 +377,8 @@ static void
>>>>                 event_interrupt_wq_v10(struct
>>>>
>>>>             kfd_node *dev,
>>>>
>>>>                                struct kfd_hsa_memory_exception_data
>>>>                 exception_data;
>>>>
>>>>                                /* gfxhub */
>>>>                 -             if (!vmid_type &&
>>>>                 dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
>>>>                 -                     hub_inst =
>>>>                 dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev-
>>>>                 adev,
>>>>                 -                             node_id);
>>>>                 -                     if (hub_inst < 0)
>>>>                 -                             hub_inst = 0;
>>>>                 -             }
>>>>                 +             if (!vmid_type)
>>>>                 +                     hub_inst =
>>>>                 amdgpu_amdkfd_node_id_to_xcc_id(dev->adev,
>>>>
>>>>             node_id);
>>>>
>>>>                                /* mmhub */
>>>>                                if (vmid_type && client_id ==
>>>>                 SOC15_IH_CLIENTID_VMC)
>>>>                 diff --git
>>>>                 a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>>>>
>>>>             b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>>>>
>>>>                 index a9c3580be8c9..4708b8c811a5 100644
>>>>                 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>>>>                 +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>>>>                 @@ -437,12 +437,8 @@ static void
>>>>                 event_interrupt_wq_v9(struct
>>>>
>>>>             kfd_node *dev,
>>>>
>>>>                                struct kfd_hsa_memory_exception_data
>>>>                 exception_data;
>>>>
>>>>                                /* gfxhub */
>>>>                 -             if (!vmid_type &&
>>>>                 dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
>>>>                 -                     hub_inst =
>>>>                 dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev-
>>>>                 adev,
>>>>                 -                             node_id);
>>>>                 -                     if (hub_inst < 0)
>>>>                 -                             hub_inst = 0;
>>>>                 -             }
>>>>                 +             if (!vmid_type)
>>>>                 +                     hub_inst =
>>>>                 amdgpu_amdkfd_node_id_to_xcc_id(dev->adev,
>>>>
>>>>             node_id);
>>>>
>>>>                                /* mmhub */
>>>>                                if (vmid_type && client_id ==
>>>>                 SOC15_IH_CLIENTID_VMC)
>>>>                 diff --git
>>>>                 a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>>>>
>>>>             b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>>>>
>>>>                 index ea6a8e43bd5b..b5f2f5b1069c 100644
>>>>                 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>>>>                 +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>>>>                 @@ -251,8 +251,9 @@ void
>>>>
>>>>             kfd_smi_event_update_thermal_throttling(struct kfd_node
>>>>             *dev,
>>>>
>>>>                    void kfd_smi_event_update_vmfault(struct
>>>>                 kfd_node *dev, uint16_t pasid)
>>>>                    {
>>>>                        struct amdgpu_task_info *task_info;
>>>>                 +     uint32_t xcp_id = dev->xcp ? dev->xcp->id : 0;
>>>>
>>>>                 -     task_info =
>>>>                 amdgpu_vm_get_task_info_pasid(dev->adev, pasid);
>>>>                 +     task_info =
>>>>                 amdgpu_vm_get_task_info_pasid(dev->adev, pasid,
>>>>                 xcp_id);
>>>>                        if (task_info) {
>>>>                                /* Report VM faults from user
>>>>                 applications, not retry from kernel */
>>>>                                if (task_info->pid)
>>>>
>>

[-- Attachment #2: Type: text/html, Size: 136948 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions
  2024-09-11 13:58                 ` Christian König
@ 2024-09-25 19:20                   ` Felix Kuehling
  0 siblings, 0 replies; 11+ messages in thread
From: Felix Kuehling @ 2024-09-25 19:20 UTC (permalink / raw)
  To: Christian König, Philip Yang, Chen, Xiaogang, Kim, Jonathan,
	Yang, Philip, amd-gfx@lists.freedesktop.org
  Cc: Deucher, Alexander, Joshi, Mukul

I sort of agree that KFD allocated one PASID per process for historical 
reasons. KFD used one address space per process that was shared by all 
GPUs and the CPU, so it seemed wasteful to allocate multiple PASIDs for 
the same process.

On the other hand, I also don't see why you can't use the same PASID on 
multiple devices, even if they do refer to different address spaces. If 
the PASID is a per-device address space ID, that should be perfectly 
fine. It doesn't need to be globally unique. If it was globally unique, 
the adev->vm_manager.pasids table should actually be global, not in the 
adev.

That said, it should be doable to change KFD to use per-VM PASIDs. 
PASIDs are not exposed in user mode, so user mode APIs wouldn't be 
affected. Some debugfs stuff and dmesg logs would change. And we'd need 
to move kfd_process->pasid to pdd->pasids. This is probably a bigger 
change than what Jon proposed in his patch, though.

Regards,
   Felix


On 2024-09-11 09:58, Christian König wrote:
> Am 11.09.24 um 15:40 schrieb Philip Yang:
>>
>> On 2024-09-11 02:54, Christian König wrote:
>>
>>> Yeah, I completely agree with Xiaogang.
>>>
>>> The PASID is an identifier of an address space. And the idea of the 
>>> KFD was that we can just use the same address space and with it the 
>>> page tables for multiple execution devices, e.g. CPUs, GPUs etc...
>>>
>>> That idea turned out to be a bad one because it clashes with some 
>>> use cases (e.g. native context virtualization). The better approach 
>>> is to see the CPU and GPU processes as separate things which just 
>>> share the same underlying data.
>>>
>>> Opening the KFD node multiple times currently results in the same 
>>> KFD process being used. We should probably consider changing that.
>>
>> It is one KFD process binding to one app process, with count to 
>> support multiple open/close of for the same process.
>>
>
> Yeah and to repeat myself: That seems to be a bad idea.
>
>> The IOMMU most likely uses Linux process pid, not from kfd 
>> process->pasid. The KFD process->pasid is passed to F/W to map 
>> queues, flush TLB.
>>
>
> Actually no, the IOMMU uses driver assigned PASIDs.
>
>> The reason to replace vm->pasid with KFD process->pasid is to find vm 
>> from fault pasid, then for compute vm, find the kfd process from pasid.
>>
>
> Why aren't we doing it the other way around? In other words start 
> using the VM assigned PASID?
>
> IIRC that the KFD overwrites the PASID inside the VM was only done for 
> the ATC/IOMMU handling, which is now completely gone.
>
> Regards,
> Christian.
>
>> I can see a bug in amdgpu_vm_handle_fault, only for compute vm, to 
>> force update PTE no-retry-fault to the incorrect VM for multiple 
>> partitions. This patch will fix this bug but we can have a simple fix.
>>
>> Regards,
>>
>> Philip
>>
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 11.09.24 um 01:59 schrieb Chen, Xiaogang:
>>>>
>>>>
>>>> You want have 1:1 mapping between vm and pasid so can query vm from 
>>>> pasid.  I think there is a basic existing issue that we cannot have 
>>>> vm and pasid 1:1 correspondence.
>>>>
>>>> PASIDs are global address space identifiers that can be shared 
>>>> between the GPU, an IOMMU and the driver. One app should have one 
>>>> pasid that iommu uses to decide which page table to use when device 
>>>> access system resource. But one app can open render/kfd node 
>>>> multiple times even for one gpu. That said one app could have 
>>>> multiple GPU vms .
>>>>
>>>> I think we did not have this issue because app usually open a rent 
>>>> node or kfd node only once. With one adev has multiple partitions 
>>>> there are multiple vms on one adev, so have this issue.  But the 
>>>> root cause is not from multiple partitions and solution is not to 
>>>> introduce multiple pasids. I think we should have one pasid for one 
>>>> app and use different way to get vm from pasid.
>>>>
>>>>
>>>> Regards
>>>>
>>>> Xiaogang
>>>>
>>>> On 9/10/2024 3:47 PM, Kim, Jonathan wrote:
>>>>>
>>>>> [Public]
>>>>>
>>>>>
>>>>>
>>>>> 	
>>>>> Caution: This message originated from an External Source. Use 
>>>>> proper caution when opening attachments, clicking links, or 
>>>>> responding.
>>>>>
>>>>>
>>>>> [Public]
>>>>>
>>>>>
>>>>> KMS open still set per pasid-vm bindings per adev (socket) so I 
>>>>> don’t see how the per-partition overwrite PASID issue is primarily 
>>>>> a KFD concern.
>>>>>
>>>>> Are you saying the KFD process devices holds a shadow copy of the 
>>>>> correct VM during page restore during fault?
>>>>>
>>>>> Doesn’t it acquire the wrong VM object on process init in the 
>>>>> first place?
>>>>>
>>>>> Even if it were the case the KFD had a separate VM reference, the 
>>>>> underlying IRQ fault handling is still broken.
>>>>>
>>>>> We probably don’t want to bandage over something to fix one symptom.
>>>>>
>>>>> Jon
>>>>>
>>>>> *From:*Yang, Philip <Philip.Yang@amd.com>
>>>>> *Sent:* Tuesday, September 10, 2024 11:24 AM
>>>>> *To:* Koenig, Christian <Christian.Koenig@amd.com>; Kim, Jonathan 
>>>>> <Jonathan.Kim@amd.com>; amd-gfx@lists.freedesktop.org
>>>>> *Cc:* Kuehling, Felix <Felix.Kuehling@amd.com>; Deucher, Alexander 
>>>>> <Alexander.Deucher@amd.com>; Joshi, Mukul <Mukul.Joshi@amd.com>
>>>>> *Subject:* Re: [PATCH] drm/amdkfd: fix vm-pasid lookup for 
>>>>> multiple partitions
>>>>>
>>>>> On 2024-09-09 14:46, Christian König wrote:
>>>>>
>>>>>     Am 09.09.24 um 18:02 schrieb Kim, Jonathan:
>>>>>
>>>>>         [Public]
>>>>>
>>>>>
>>>>>             -----Original Message-----
>>>>>             From: Christian König
>>>>>             <ckoenig.leichtzumerken@gmail.com>
>>>>>             <mailto:ckoenig.leichtzumerken@gmail.com>
>>>>>             Sent: Thursday, September 5, 2024 10:24 AM
>>>>>             To: Kim, Jonathan <Jonathan.Kim@amd.com>
>>>>>             <mailto:Jonathan.Kim@amd.com>;
>>>>>             amd-gfx@lists.freedesktop.org
>>>>>             Cc: Kuehling, Felix <Felix.Kuehling@amd.com>
>>>>>             <mailto:Felix.Kuehling@amd.com>; Deucher, Alexander
>>>>>             <Alexander.Deucher@amd.com>
>>>>>             <mailto:Alexander.Deucher@amd.com>; Joshi, Mukul
>>>>>             <Mukul.Joshi@amd.com> <mailto:Mukul.Joshi@amd.com>
>>>>>             Subject: Re: [PATCH] drm/amdkfd: fix vm-pasid lookup
>>>>>             for multiple partitions
>>>>>
>>>>>             Caution: This message originated from an External
>>>>>             Source. Use proper caution
>>>>>             when opening attachments, clicking links, or responding.
>>>>>
>>>>>
>>>>>             Am 19.08.24 um 19:59 schrieb Jonathan Kim:
>>>>>
>>>>>                 Currently multiple partitions will incorrectly
>>>>>                 overwrite the VM lookup
>>>>>                 table since the table is indexed by PASID and
>>>>>                 multiple partitions can
>>>>>                 register different VM objects on the same PASID.
>>>>>
>>>>>             That's a rather bad idea. Why do we have the same
>>>>>             PASID for different VM
>>>>>             objects in the first place?
>>>>>
>>>>>         Alex can probably elaborate on the KGD side, but from what
>>>>>         I can see, the KMS driver open call has always assigned a
>>>>>         new VM object per PASID on an open call.
>>>>>         The KFD acquires and replaces the KGD PASID-VMID
>>>>>         registration on its own compute process open/creation call.
>>>>>         If this is the bad_idea you're referring to, then someone
>>>>>         else will have to chime in.  I don't have much history on
>>>>>         this unfortunately.
>>>>>
>>>>>
>>>>>     Yeah, Felix and I designed that.
>>>>>
>>>>> app opens drm node to create vm for each partition, with different 
>>>>> vm->pasid for each vm, issue is from kfd_ioctl_acquire_vm -> 
>>>>> kfd_process_device_init_vm -> amdgpu_amdkfd_gpuvm_set_vm_pasid, to 
>>>>> replace all vm->pasid with kfd process->pasid, which is from open 
>>>>> kfd node. This ends up to store only one vm to 
>>>>> adev->vm_manager.pasids with KFD process pasid, so we cannot 
>>>>> retrieve correct vm from adev->vm_manager.pasids on mGPUs or 
>>>>> multiple partitions.
>>>>>
>>>>>
>>>>>
>>>>>         That aside, the current problem is, is that all KFD device
>>>>>         structures are logical partitions and register their
>>>>>         PASID-VM binding using this concept of a device.
>>>>>
>>>>>
>>>>>     As far as I can see that is the fundamental problem. This
>>>>>     needs to be fixed instead.
>>>>>
>>>>>
>>>>>         On the KGD side however, the registration table is
>>>>>         maintained in the adev struct, which is a physical socket.
>>>>>         So there's a mismatch in understanding of what a device is
>>>>>         between the KFD & KGD with regard to the look up table
>>>>>         that results in bad bindings.
>>>>>
>>>>>         Adding a per-partition dimension to the existing lookup
>>>>>         table resolves issues where seeing, for example, with
>>>>>         memory violation interception and XNACK i.e bad bindings
>>>>>         result in wrong vm object found to set no-retry flags on
>>>>>         memory violations.
>>>>>
>>>>> svm_range_restore_pages retry fault recover uses fault pasid to 
>>>>> get kfd process, and use the fault node_id to get pdd->vm, maybe 
>>>>> you can use this way to fix the debugger issue.
>>>>>
>>>>> Regards,
>>>>>
>>>>> Philip
>>>>>
>>>>>
>>>>>     Yeah that is pretty much a no-go.
>>>>>
>>>>>     The PASID and how it is used is defined by the PCIe
>>>>>     specifications. If we now start to assign multiple VMs to the
>>>>>     same PASID then we are violating the PCIe specification.
>>>>>
>>>>>     The problems you see are most likely just the tip of the
>>>>>     iceberg here.
>>>>>
>>>>>     Regards,
>>>>>     Christian.
>>>>>
>>>>>
>>>>>
>>>>>         Jon
>>>>>
>>>>>
>>>>>             Regards,
>>>>>             Christian.
>>>>>
>>>>>
>>>>>                 This results in loading the wrong VM object on
>>>>>                 PASID query.
>>>>>
>>>>>                 To correct this, setup the lookup table to be
>>>>>                 per-partition-per-PASID
>>>>>                 instead.
>>>>>
>>>>>                 Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
>>>>>                 <mailto:jonathan.kim@amd.com>
>>>>>                 ---
>>>>>                 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 12 ++++
>>>>>                 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  1 +
>>>>>                 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  4 +-
>>>>>                 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  7 ++-
>>>>>                 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55
>>>>>                 +++++++++++------
>>>>>
>>>>>             -- 
>>>>>
>>>>>                 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 11 +++-
>>>>>                 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  5 +-
>>>>>                 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c |  5 +-
>>>>>                 drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c |  5 +-
>>>>>                 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c |  3 +-
>>>>>                 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c |  5 +-
>>>>>                 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 ++----
>>>>>                 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  2 +-
>>>>>                 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c |  4 +-
>>>>>                 drivers/gpu/drm/amd/amdkfd/kfd_events.c |  3 +-
>>>>>                 .../gpu/drm/amd/amdkfd/kfd_int_process_v10.c |  8 +--
>>>>>                 .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c |  8 +--
>>>>>                 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c |  3 +-
>>>>>                    18 files changed, 92 insertions(+), 65
>>>>>                 deletions(-)
>>>>>
>>>>>                 diff --git
>>>>>                 a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>>>
>>>>>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>>>
>>>>>                 index c272461d70a9..28db789610e1 100644
>>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>>>>>                 @@ -887,3 +887,15 @@ int
>>>>>                 amdgpu_amdkfd_unmap_hiq(struct
>>>>>
>>>>>             amdgpu_device *adev, u32 doorbell_off,
>>>>>
>>>>>                        return r;
>>>>>                    }
>>>>>                 +
>>>>>                 +int amdgpu_amdkfd_node_id_to_xcc_id(struct
>>>>>                 amdgpu_device *adev,
>>>>>
>>>>>             uint32_t node_id)
>>>>>
>>>>>                 +{
>>>>>                 +     if (adev->gfx.funcs->ih_node_to_logical_xcc) {
>>>>>                 +             int xcc_id =
>>>>>                 adev->gfx.funcs->ih_node_to_logical_xcc(adev,
>>>>>                 node_id);
>>>>>                 +
>>>>>                 +             if (xcc_id >= 0)
>>>>>                 +                     return xcc_id;
>>>>>                 +     }
>>>>>                 +
>>>>>                 +     return 0;
>>>>>                 +}
>>>>>                 diff --git
>>>>>                 a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>>
>>>>>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>>
>>>>>                 index 4ed49265c764..bf8bb45d8ab6 100644
>>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>>                 @@ -356,6 +356,7 @@ void
>>>>>                 amdgpu_amdkfd_unreserve_mem_limit(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                 uint64_t size, u32 alloc_flag, int8_t xcp_id);
>>>>>
>>>>>                    u64 amdgpu_amdkfd_xcp_memory_size(struct
>>>>>                 amdgpu_device *adev, int
>>>>>
>>>>>             xcp_id);
>>>>>
>>>>>                 +int amdgpu_amdkfd_node_id_to_xcc_id(struct
>>>>>                 amdgpu_device *adev,
>>>>>
>>>>>             uint32_t node_id);
>>>>>
>>>>>                    #define KFD_XCP_MEM_ID(adev, xcp_id) \
>>>>>                                ((adev)->xcp_mgr && (xcp_id) >= 0 ?\
>>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>>>>
>>>>>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>>>>
>>>>>                 index c6a1783fc9ef..bf9f8802e18d 100644
>>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>>>>                 @@ -37,7 +37,7 @@ static enum drm_gpu_sched_stat
>>>>>
>>>>>             amdgpu_job_timedout(struct drm_sched_job *s_job)
>>>>>
>>>>>                        struct amdgpu_job *job = to_amdgpu_job(s_job);
>>>>>                        struct amdgpu_task_info *ti;
>>>>>                        struct amdgpu_device *adev = ring->adev;
>>>>>                 -     int idx;
>>>>>                 +     int idx, xcp_id = !job->vm ? 0 :
>>>>>                 job->vm->xcp_id;
>>>>>                        int r;
>>>>>
>>>>>                        if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
>>>>>                 @@ -62,7 +62,7 @@ static enum drm_gpu_sched_stat
>>>>>
>>>>>             amdgpu_job_timedout(struct drm_sched_job *s_job)
>>>>>
>>>>>                 job->base.sched->name,
>>>>>                 atomic_read(&ring->fence_drv.last_seq),
>>>>>                 ring->fence_drv.sync_seq);
>>>>>
>>>>>                 -     ti =
>>>>>                 amdgpu_vm_get_task_info_pasid(ring->adev,
>>>>>                 job->pasid);
>>>>>                 +     ti =
>>>>>                 amdgpu_vm_get_task_info_pasid(ring->adev,
>>>>>                 job->pasid, xcp_id);
>>>>>                        if (ti) {
>>>>>                                dev_err(adev->dev,
>>>>>                                        "Process information:
>>>>>                 process %s pid %d thread %s pid %d\n",
>>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>>>
>>>>>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>>>
>>>>>                 index d9fde38f6ee2..e413bf4a3e84 100644
>>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>>>                 @@ -1275,17 +1275,20 @@ int
>>>>>                 amdgpu_info_ioctl(struct drm_device *dev,
>>>>>
>>>>>             void *data, struct drm_file *filp)
>>>>>
>>>>>                 struct amdgpu_vm *vm = &fpriv->vm;
>>>>>                                struct drm_amdgpu_info_gpuvm_fault
>>>>>                 gpuvm_fault;
>>>>>                                unsigned long flags;
>>>>>                 +             int i;
>>>>>
>>>>>                                if (!vm)
>>>>>                                        return -EINVAL;
>>>>>
>>>>>                 memset(&gpuvm_fault, 0, sizeof(gpuvm_fault));
>>>>>
>>>>>                 - xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>>>>                 +             for (i = 0; i <
>>>>>                 adev->xcp_mgr->num_xcps; i++)
>>>>>                 + xa_lock_irqsave(&adev->vm_manager.pasids[i],
>>>>>                 flags);
>>>>>                                gpuvm_fault.addr =
>>>>>                 vm->fault_info.addr;
>>>>>                                gpuvm_fault.status =
>>>>>                 vm->fault_info.status;
>>>>>                                gpuvm_fault.vmhub =
>>>>>                 vm->fault_info.vmhub;
>>>>>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids,
>>>>>                 flags);
>>>>>                 +             for (i = 0; i <
>>>>>                 adev->xcp_mgr->num_xcps; i++)
>>>>>                 +
>>>>>                 xa_unlock_irqrestore(&adev->vm_manager.pasids[i],
>>>>>                 flags);
>>>>>
>>>>>                                return copy_to_user(out, &gpuvm_fault,
>>>>>                 min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT
>>>>>                 : 0;
>>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>
>>>>>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>
>>>>>                 index bcb729094521..f43e1c15f423 100644
>>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>                 @@ -146,7 +146,7 @@ int amdgpu_vm_set_pasid(struct
>>>>>                 amdgpu_device
>>>>>
>>>>>             *adev, struct amdgpu_vm *vm,
>>>>>
>>>>>                 return 0;
>>>>>
>>>>>                        if (vm->pasid) {
>>>>>                 -             r =
>>>>>                 xa_err(xa_erase_irq(&adev->vm_manager.pasids,
>>>>>                 vm->pasid));
>>>>>                 +             r =
>>>>>                 xa_err(xa_erase_irq(&adev->vm_manager.pasids[vm->xcp_id],
>>>>>
>>>>>
>>>>>             vm->pasid));
>>>>>
>>>>>                                if (r < 0)
>>>>>                                        return r;
>>>>>
>>>>>                 @@ -154,7 +154,7 @@ int amdgpu_vm_set_pasid(struct
>>>>>                 amdgpu_device
>>>>>
>>>>>             *adev, struct amdgpu_vm *vm,
>>>>>
>>>>>                        }
>>>>>
>>>>>                        if (pasid) {
>>>>>                 -             r =
>>>>>                 xa_err(xa_store_irq(&adev->vm_manager.pasids,
>>>>>                 pasid, vm,
>>>>>                 +             r =
>>>>>                 xa_err(xa_store_irq(&adev->vm_manager.pasids[vm->xcp_id],
>>>>>
>>>>>
>>>>>             pasid, vm,
>>>>>
>>>>>                 GFP_KERNEL));
>>>>>                                if (r < 0)
>>>>>                                        return r;
>>>>>                 @@ -2288,14 +2288,14 @@ static void
>>>>>
>>>>>             amdgpu_vm_destroy_task_info(struct kref *kref)
>>>>>
>>>>>                    }
>>>>>
>>>>>                    static inline struct amdgpu_vm *
>>>>>                 -amdgpu_vm_get_vm_from_pasid(struct amdgpu_device
>>>>>                 *adev, u32 pasid)
>>>>>                 +amdgpu_vm_get_vm_from_pasid(struct amdgpu_device
>>>>>                 *adev, u32 pasid,
>>>>>
>>>>>             u32 xcp_id)
>>>>>
>>>>>                    {
>>>>>                        struct amdgpu_vm *vm;
>>>>>                        unsigned long flags;
>>>>>
>>>>>                 - xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>>>>                 -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>>>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids,
>>>>>                 flags);
>>>>>                 +
>>>>>                 xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id],
>>>>>                 flags);
>>>>>                 +     vm =
>>>>>                 xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
>>>>>                 +
>>>>>                 xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id],
>>>>>                 flags);
>>>>>
>>>>>                        return vm;
>>>>>                    }
>>>>>                 @@ -2343,10 +2343,10 @@
>>>>>                 amdgpu_vm_get_task_info_vm(struct
>>>>>
>>>>>             amdgpu_vm *vm)
>>>>>
>>>>>                     * referenced down with amdgpu_vm_put_task_info.
>>>>>                     */
>>>>>                    struct amdgpu_task_info *
>>>>>                 -amdgpu_vm_get_task_info_pasid(struct
>>>>>                 amdgpu_device *adev, u32 pasid)
>>>>>                 +amdgpu_vm_get_task_info_pasid(struct
>>>>>                 amdgpu_device *adev, u32 pasid,
>>>>>
>>>>>             u32 xcp_id)
>>>>>
>>>>>                    {
>>>>>                        return amdgpu_vm_get_task_info_vm(
>>>>>                 - amdgpu_vm_get_vm_from_pasid(adev, pasid));
>>>>>                 + amdgpu_vm_get_vm_from_pasid(adev, pasid, xcp_id));
>>>>>                    }
>>>>>
>>>>>                    static int amdgpu_vm_create_task_info(struct
>>>>>                 amdgpu_vm *vm)
>>>>>                 @@ -2481,6 +2481,8 @@ int amdgpu_vm_init(struct
>>>>>                 amdgpu_device
>>>>>
>>>>>             *adev, struct amdgpu_vm *vm,
>>>>>
>>>>>                 amdgpu_bo_unreserve(vm->root.bo);
>>>>>                        amdgpu_bo_unref(&root_bo);
>>>>>
>>>>>                 +     vm->xcp_id = xcp_id < 0 ? 0 : xcp_id;
>>>>>                 +
>>>>>                        return 0;
>>>>>
>>>>>                    error_free_root:
>>>>>                 @@ -2695,8 +2697,8 @@ void
>>>>>                 amdgpu_vm_manager_init(struct
>>>>>
>>>>>             amdgpu_device *adev)
>>>>>
>>>>>                    #else
>>>>>                 adev->vm_manager.vm_update_mode = 0;
>>>>>                    #endif
>>>>>                 -
>>>>>                 - xa_init_flags(&adev->vm_manager.pasids,
>>>>>                 XA_FLAGS_LOCK_IRQ);
>>>>>                 +     for (i = 0; i < MAX_XCP; i++)
>>>>>                 + xa_init_flags(&(adev->vm_manager.pasids[i]),
>>>>>
>>>>>             XA_FLAGS_LOCK_IRQ);
>>>>>
>>>>>                    }
>>>>>
>>>>>                    /**
>>>>>                 @@ -2708,10 +2710,15 @@ void
>>>>>                 amdgpu_vm_manager_init(struct
>>>>>
>>>>>             amdgpu_device *adev)
>>>>>
>>>>>                     */
>>>>>                    void amdgpu_vm_manager_fini(struct
>>>>>                 amdgpu_device *adev)
>>>>>                    {
>>>>>                 - WARN_ON(!xa_empty(&adev->vm_manager.pasids));
>>>>>                 - xa_destroy(&adev->vm_manager.pasids);
>>>>>                 +     int i;
>>>>>                 +
>>>>>                 +     for (i = 0; i < MAX_XCP; i++) {
>>>>>                 + WARN_ON(!xa_empty(&adev->vm_manager.pasids[i]));
>>>>>                 + xa_destroy(&adev->vm_manager.pasids[i]);
>>>>>                 +     }
>>>>>
>>>>>                        amdgpu_vmid_mgr_fini(adev);
>>>>>                 +
>>>>>                    }
>>>>>
>>>>>                    /**
>>>>>                 @@ -2778,17 +2785,18 @@ bool
>>>>>                 amdgpu_vm_handle_fault(struct
>>>>>
>>>>>             amdgpu_device *adev, u32 pasid,
>>>>>
>>>>>                        unsigned long irqflags;
>>>>>                        uint64_t value, flags;
>>>>>                        struct amdgpu_vm *vm;
>>>>>                 -     int r;
>>>>>                 +     int r, xcp_id;
>>>>>
>>>>>                 - xa_lock_irqsave(&adev->vm_manager.pasids,
>>>>>                 irqflags);
>>>>>                 -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>>>                 +     xcp_id =
>>>>>                 amdgpu_amdkfd_node_id_to_xcc_id(adev, node_id)/adev-
>>>>>                 gfx.num_xcc_per_xcp;
>>>>>                 +
>>>>>                 xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id],
>>>>>                 irqflags);
>>>>>                 +     vm =
>>>>>                 xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
>>>>>                        if (vm) {
>>>>>                                root = amdgpu_bo_ref(vm->root.bo);
>>>>>                                is_compute_context =
>>>>>                 vm->is_compute_context;
>>>>>                        } else {
>>>>>                                root = NULL;
>>>>>                        }
>>>>>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids,
>>>>>                 irqflags);
>>>>>                 +
>>>>>                 xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id],
>>>>>                 irqflags);
>>>>>
>>>>>                        if (!root)
>>>>>                                return false;
>>>>>                 @@ -2806,11 +2814,11 @@ bool
>>>>>                 amdgpu_vm_handle_fault(struct
>>>>>
>>>>>             amdgpu_device *adev, u32 pasid,
>>>>>
>>>>>                                goto error_unref;
>>>>>
>>>>>                        /* Double check that the VM still exists */
>>>>>                 - xa_lock_irqsave(&adev->vm_manager.pasids,
>>>>>                 irqflags);
>>>>>                 -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>>>                 +
>>>>>                 xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id],
>>>>>                 irqflags);
>>>>>                 +     vm =
>>>>>                 xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
>>>>>                        if (vm && vm->root.bo != root)
>>>>>                                vm = NULL;
>>>>>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids,
>>>>>                 irqflags);
>>>>>                 +
>>>>>                 xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id],
>>>>>                 irqflags);
>>>>>                        if (!vm)
>>>>>                                goto error_unlock;
>>>>>
>>>>>                 @@ -2968,14 +2976,15 @@ void
>>>>>                 amdgpu_vm_update_fault_cache(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                 unsigned int pasid,
>>>>>                 uint64_t addr,
>>>>>                 uint32_t status,
>>>>>                 - unsigned int vmhub)
>>>>>                 + unsigned int vmhub,
>>>>>                 + uint32_t xcp_id)
>>>>>                    {
>>>>>                        struct amdgpu_vm *vm;
>>>>>                        unsigned long flags;
>>>>>
>>>>>                 - xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>>>>                 +
>>>>>                 xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id],
>>>>>                 flags);
>>>>>
>>>>>                 -     vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>>>                 +     vm =
>>>>>                 xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
>>>>>                        /* Don't update the fault cache if status
>>>>>                 is 0.  In the multiple
>>>>>                         * fault case, subsequent faults will
>>>>>                 return a 0 status which is
>>>>>                         * useless for userspace and replaces the
>>>>>                 useful fault status, so
>>>>>                 @@ -3008,7 +3017,7 @@ void
>>>>>                 amdgpu_vm_update_fault_cache(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                 WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
>>>>>                                }
>>>>>                        }
>>>>>                 - xa_unlock_irqrestore(&adev->vm_manager.pasids,
>>>>>                 flags);
>>>>>                 +
>>>>>                 xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id],
>>>>>                 flags);
>>>>>                    }
>>>>>
>>>>>                    /**
>>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>
>>>>>             b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>
>>>>>                 index 046949c4b695..1499f5f731e9 100644
>>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>                 @@ -35,6 +35,7 @@
>>>>>                    #include "amdgpu_sync.h"
>>>>>                    #include "amdgpu_ring.h"
>>>>>                    #include "amdgpu_ids.h"
>>>>>                 +#include "amdgpu_xcp.h"
>>>>>
>>>>>                    struct drm_exec;
>>>>>
>>>>>                 @@ -418,6 +419,9 @@ struct amdgpu_vm {
>>>>>
>>>>>                        /* cached fault info */
>>>>>                        struct amdgpu_vm_fault_info fault_info;
>>>>>                 +
>>>>>                 +     /* XCP ID */
>>>>>                 +     int xcp_id;
>>>>>                    };
>>>>>
>>>>>                    struct amdgpu_vm_manager {
>>>>>                 @@ -456,7 +460,7 @@ struct amdgpu_vm_manager {
>>>>>                        /* PASID to VM mapping, will be used in
>>>>>                 interrupt context to
>>>>>                         * look up VM of a page fault
>>>>>                         */
>>>>>                 -     struct xarray pasids;
>>>>>                 +     struct xarray pasids[MAX_XCP];
>>>>>                        /* Global registration of recent page fault
>>>>>                 information */
>>>>>                        struct amdgpu_vm_fault_info fault_info;
>>>>>                    };
>>>>>                 @@ -550,7 +554,7 @@ bool
>>>>>                 amdgpu_vm_need_pipeline_sync(struct
>>>>>
>>>>>             amdgpu_ring *ring,
>>>>>
>>>>>                    void amdgpu_vm_check_compute_bug(struct
>>>>>                 amdgpu_device *adev);
>>>>>
>>>>>                    struct amdgpu_task_info *
>>>>>                 -amdgpu_vm_get_task_info_pasid(struct
>>>>>                 amdgpu_device *adev, u32 pasid);
>>>>>                 +amdgpu_vm_get_task_info_pasid(struct
>>>>>                 amdgpu_device *adev, u32 pasid,
>>>>>
>>>>>             u32 xcp_id);
>>>>>
>>>>>                    struct amdgpu_task_info *
>>>>>                    amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm);
>>>>>                 @@ -649,7 +653,8 @@ void
>>>>>                 amdgpu_vm_update_fault_cache(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                 unsigned int pasid,
>>>>>                 uint64_t addr,
>>>>>                 uint32_t status,
>>>>>                 - unsigned int vmhub);
>>>>>                 + unsigned int vmhub,
>>>>>                 + uint32_t xcp_id);
>>>>>                    void amdgpu_vm_tlb_fence_create(struct
>>>>>                 amdgpu_device *adev,
>>>>>                                                 struct amdgpu_vm *vm,
>>>>>                                                 struct dma_fence
>>>>>                 **fence);
>>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>>>>
>>>>>             b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>>>>
>>>>>                 index f0ceab3ce5bf..24b042febf5c 100644
>>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>>>>                 @@ -151,7 +151,8 @@ static int
>>>>>                 gmc_v10_0_process_interrupt(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>>>>>
>>>>>                 amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>>>>                 addr, status,
>>>>>                 -                                         
>>>>>                 entry->vmid_src ? AMDGPU_MMHUB0(0) :
>>>>>
>>>>>             AMDGPU_GFXHUB(0));
>>>>>
>>>>>                 + entry->vmid_src ? AMDGPU_MMHUB0(0) :
>>>>>
>>>>>             AMDGPU_GFXHUB(0),
>>>>>
>>>>>                 + 0);
>>>>>                        }
>>>>>
>>>>>                        if (!printk_ratelimit())
>>>>>                 @@ -161,7 +162,7 @@ static int
>>>>>                 gmc_v10_0_process_interrupt(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                 "[%s] page fault (src_id:%u ring:%u vmid:%u
>>>>>                 pasid:%u)\n",
>>>>>                                entry->vmid_src ? "mmhub" : "gfxhub",
>>>>>                                entry->src_id, entry->ring_id,
>>>>>                 entry->vmid, entry->pasid);
>>>>>                 -     task_info =
>>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>>>>                 +     task_info =
>>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>>>>>                        if (task_info) {
>>>>>                                dev_err(adev->dev,
>>>>>                                        " in process %s pid %d
>>>>>                 thread %s pid %d\n",
>>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>>>>>
>>>>>             b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>>>>>
>>>>>                 index 2797fd84432b..3507046d33e6 100644
>>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>>>>>                 @@ -122,7 +122,8 @@ static int
>>>>>                 gmc_v11_0_process_interrupt(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>>>>>
>>>>>                 amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>>>>                 addr, status,
>>>>>                 -                                         
>>>>>                 entry->vmid_src ? AMDGPU_MMHUB0(0) :
>>>>>
>>>>>             AMDGPU_GFXHUB(0));
>>>>>
>>>>>                 + entry->vmid_src ? AMDGPU_MMHUB0(0) :
>>>>>
>>>>>             AMDGPU_GFXHUB(0),
>>>>>
>>>>>                 + 0);
>>>>>                        }
>>>>>
>>>>>                        if (printk_ratelimit()) {
>>>>>                 @@ -132,7 +133,7 @@ static int
>>>>>                 gmc_v11_0_process_interrupt(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                 "[%s] page fault (src_id:%u ring:%u vmid:%u
>>>>>                 pasid:%u)\n",
>>>>>                 entry->vmid_src ? "mmhub" : "gfxhub",
>>>>>                 entry->src_id, entry->ring_id, entry->vmid,
>>>>>                 entry->pasid);
>>>>>                 -             task_info =
>>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>>>>                 +             task_info =
>>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>>>>>                                if (task_info) {
>>>>>                 dev_err(adev->dev,
>>>>>                                                " in process %s pid
>>>>>                 %d thread %s pid %d)\n",
>>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>>>>
>>>>>             b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>>>>
>>>>>                 index 60acf676000b..9844564c6c74 100644
>>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>>>>                 @@ -115,7 +115,8 @@ static int
>>>>>                 gmc_v12_0_process_interrupt(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>>>>>
>>>>>                 amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>>>>                 addr, status,
>>>>>                 -                                         
>>>>>                 entry->vmid_src ? AMDGPU_MMHUB0(0) :
>>>>>
>>>>>             AMDGPU_GFXHUB(0));
>>>>>
>>>>>                 + entry->vmid_src ? AMDGPU_MMHUB0(0) :
>>>>>
>>>>>             AMDGPU_GFXHUB(0),
>>>>>
>>>>>                 + 0);
>>>>>                        }
>>>>>
>>>>>                        if (printk_ratelimit()) {
>>>>>                 @@ -125,7 +126,7 @@ static int
>>>>>                 gmc_v12_0_process_interrupt(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                 "[%s] page fault (src_id:%u ring:%u vmid:%u
>>>>>                 pasid:%u)\n",
>>>>>                 entry->vmid_src ? "mmhub" : "gfxhub",
>>>>>                 entry->src_id, entry->ring_id, entry->vmid,
>>>>>                 entry->pasid);
>>>>>                 -             task_info =
>>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>>>>                 +             task_info =
>>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>>>>>                                if (task_info) {
>>>>>                 dev_err(adev->dev,
>>>>>                                                " in process %s pid
>>>>>                 %d thread %s pid %d)\n",
>>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>>>>
>>>>>             b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>>>>
>>>>>                 index 994432fb57ea..2cdb0cbb7c4d 100644
>>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>>>>                 @@ -1268,7 +1268,8 @@ static int
>>>>>                 gmc_v7_0_process_interrupt(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                 return 0;
>>>>>
>>>>>                 amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>>>>                 - ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
>>>>>
>>>>>             AMDGPU_GFXHUB(0));
>>>>>
>>>>>                 + ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
>>>>>                 + AMDGPU_GFXHUB(0), 0);
>>>>>
>>>>>                        if (amdgpu_vm_fault_stop ==
>>>>>                 AMDGPU_VM_FAULT_STOP_FIRST)
>>>>>                 gmc_v7_0_set_fault_enable_default(adev, false);
>>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>>>>
>>>>>             b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>>>>
>>>>>                 index 86488c052f82..6855caeb7f74 100644
>>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>>>>                 @@ -1437,7 +1437,8 @@ static int
>>>>>                 gmc_v8_0_process_interrupt(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                 return 0;
>>>>>
>>>>>                 amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>>>>                 - ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
>>>>>
>>>>>             AMDGPU_GFXHUB(0));
>>>>>
>>>>>                 + ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
>>>>>                 + AMDGPU_GFXHUB(0), 0);
>>>>>
>>>>>                        if (amdgpu_vm_fault_stop ==
>>>>>                 AMDGPU_VM_FAULT_STOP_FIRST)
>>>>>                 gmc_v8_0_set_fault_enable_default(adev, false);
>>>>>                 @@ -1448,7 +1449,7 @@ static int
>>>>>                 gmc_v8_0_process_interrupt(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                 dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
>>>>>                 entry->src_id, entry->src_data[0]);
>>>>>
>>>>>                 -             task_info =
>>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>>>>                 +             task_info =
>>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>>>>>                                if (task_info) {
>>>>>                 dev_err(adev->dev, " for process %s pid %d thread
>>>>>                 %s pid %d\n",
>>>>>                 task_info->process_name, task_info->tgid,
>>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>>>
>>>>>             b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>>>
>>>>>                 index b73136d390cc..e183e08b2c02 100644
>>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>>>                 @@ -556,10 +556,12 @@ static int
>>>>>                 gmc_v9_0_process_interrupt(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                        unsigned int vmhub;
>>>>>                        u64 addr;
>>>>>                        uint32_t cam_index = 0;
>>>>>                 -     int ret, xcc_id = 0;
>>>>>                 -     uint32_t node_id;
>>>>>                 +     int ret;
>>>>>                 +     uint32_t node_id, xcc_id, xcp_id;
>>>>>
>>>>>                        node_id = entry->node_id;
>>>>>                 +     xcc_id =
>>>>>                 amdgpu_amdkfd_node_id_to_xcc_id(adev, node_id);
>>>>>                 +     xcp_id = xcc_id/adev->gfx.num_xcc_per_xcp;
>>>>>
>>>>>                        addr = (u64)entry->src_data[0] << 12;
>>>>>                        addr |= ((u64)entry->src_data[1] & 0xf) << 44;
>>>>>                 @@ -572,12 +574,6 @@ static int
>>>>>                 gmc_v9_0_process_interrupt(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                 vmhub = AMDGPU_MMHUB1(0);
>>>>>                        } else {
>>>>>                                hub_name = "gfxhub0";
>>>>>                 -             if
>>>>>                 (adev->gfx.funcs->ih_node_to_logical_xcc) {
>>>>>                 -                     xcc_id =
>>>>>                 adev->gfx.funcs->ih_node_to_logical_xcc(adev,
>>>>>                 - node_id);
>>>>>                 -                     if (xcc_id < 0)
>>>>>                 -                             xcc_id = 0;
>>>>>                 -             }
>>>>>                                vmhub = xcc_id;
>>>>>                        }
>>>>>                        hub = &adev->vmhub[vmhub];
>>>>>                 @@ -631,7 +627,7 @@ static int
>>>>>                 gmc_v9_0_process_interrupt(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                 retry_fault ? "retry" : "no-retry",
>>>>>                                entry->src_id, entry->ring_id,
>>>>>                 entry->vmid, entry->pasid);
>>>>>
>>>>>                 -     task_info =
>>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>>>>                 +     task_info =
>>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid,
>>>>>
>>>>>             xcp_id);
>>>>>
>>>>>                        if (task_info) {
>>>>>                                dev_err(adev->dev,
>>>>>                                        " for process %s pid %d
>>>>>                 thread %s pid %d)\n",
>>>>>                 @@ -675,7 +671,7 @@ static int
>>>>>                 gmc_v9_0_process_interrupt(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                        if (!amdgpu_sriov_vf(adev))
>>>>>                 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
>>>>>
>>>>>                 - amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>>>>                 addr, status,
>>>>>
>>>>>             vmhub);
>>>>>
>>>>>                 + amdgpu_vm_update_fault_cache(adev, entry->pasid,
>>>>>                 addr, status,
>>>>>
>>>>>             vmhub, xcp_id);
>>>>>
>>>>>                 dev_err(adev->dev,
>>>>>                 "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
>>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>>>>
>>>>>             b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>>>>
>>>>>                 index 23ef4eb36b40..1ac4224bbe5b 100644
>>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>>>>                 @@ -2182,7 +2182,7 @@ static int
>>>>>                 sdma_v4_0_print_iv_entry(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                 instance, addr, entry->src_id, entry->ring_id,
>>>>>                 entry->vmid,
>>>>>                 entry->pasid);
>>>>>
>>>>>                 -     task_info =
>>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>>>>                 +     task_info =
>>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
>>>>>                        if (task_info) {
>>>>>                 dev_dbg_ratelimited(adev->dev,
>>>>>                                                    " for process
>>>>>                 %s pid %d thread %s pid %d\n",
>>>>>                 diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>>>>>
>>>>>             b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>>>>>
>>>>>                 index 57f16c09abfc..c8b5c0302ca7 100644
>>>>>                 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>>>>>                 +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>>>>>                 @@ -1683,6 +1683,8 @@ static int
>>>>>                 sdma_v4_4_2_print_iv_entry(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                        int instance;
>>>>>                        struct amdgpu_task_info *task_info;
>>>>>                        u64 addr;
>>>>>                 +     uint32_t xcc_id =
>>>>>                 amdgpu_amdkfd_node_id_to_xcc_id(adev, entry-
>>>>>                 node_id);
>>>>>                 +     uint32_t xcp_id =
>>>>>                 xcc_id/adev->gfx.num_xcc_per_xcp;
>>>>>
>>>>>                        instance = sdma_v4_4_2_irq_id_to_seq(adev,
>>>>>                 entry->client_id);
>>>>>                        if (instance < 0 || instance >=
>>>>>                 adev->sdma.num_instances) {
>>>>>                 @@ -1698,7 +1700,7 @@ static int
>>>>>                 sdma_v4_4_2_print_iv_entry(struct
>>>>>
>>>>>             amdgpu_device *adev,
>>>>>
>>>>>                 instance, addr, entry->src_id, entry->ring_id,
>>>>>                 entry->vmid,
>>>>>                 entry->pasid);
>>>>>
>>>>>                 -     task_info =
>>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
>>>>>                 +     task_info =
>>>>>                 amdgpu_vm_get_task_info_pasid(adev, entry->pasid,
>>>>>
>>>>>             xcp_id);
>>>>>
>>>>>                        if (task_info) {
>>>>>                 dev_dbg_ratelimited(adev->dev, " for process %s
>>>>>                 pid %d thread %s
>>>>>
>>>>>             pid %d\n",
>>>>>
>>>>>                 task_info->process_name, task_info->tgid,
>>>>>                 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>>
>>>>>             b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>>
>>>>>                 index ea3792249209..c098fbaf0e1c 100644
>>>>>                 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>>                 +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>>                 @@ -1262,8 +1262,9 @@ void
>>>>>                 kfd_signal_reset_event(struct kfd_node
>>>>>
>>>>>             *dev)
>>>>>
>>>>>                                if (dev->dqm->detect_hang_count) {
>>>>>                                        struct amdgpu_task_info *ti;
>>>>>                 +                     uint32_t xcp_id = dev->xcp ?
>>>>>                 dev->xcp->id : 0;
>>>>>
>>>>>                 -                     ti =
>>>>>                 amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid);
>>>>>                 +                     ti =
>>>>>                 amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid,
>>>>>
>>>>>             xcp_id);
>>>>>
>>>>>                 if (ti) {
>>>>>                 dev_err(dev->adev->dev,
>>>>>                 "Queues reset on process %s tid %d thread %s pid
>>>>>                 %d\n",
>>>>>                 diff --git
>>>>>                 a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>>>>>
>>>>>             b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>>>>>
>>>>>                 index 8e0d0356e810..d7cbf9525698 100644
>>>>>                 ---
>>>>>                 a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>>>>>                 +++
>>>>>                 b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
>>>>>                 @@ -377,12 +377,8 @@ static void
>>>>>                 event_interrupt_wq_v10(struct
>>>>>
>>>>>             kfd_node *dev,
>>>>>
>>>>>                 struct kfd_hsa_memory_exception_data exception_data;
>>>>>
>>>>>                                /* gfxhub */
>>>>>                 -             if (!vmid_type &&
>>>>>                 dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
>>>>>                 -                     hub_inst =
>>>>>                 dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev-
>>>>>                 adev,
>>>>>                 - node_id);
>>>>>                 -                     if (hub_inst < 0)
>>>>>                 -                             hub_inst = 0;
>>>>>                 -             }
>>>>>                 +             if (!vmid_type)
>>>>>                 +                     hub_inst =
>>>>>                 amdgpu_amdkfd_node_id_to_xcc_id(dev->adev,
>>>>>
>>>>>             node_id);
>>>>>
>>>>>                                /* mmhub */
>>>>>                                if (vmid_type && client_id ==
>>>>>                 SOC15_IH_CLIENTID_VMC)
>>>>>                 diff --git
>>>>>                 a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>>>>>
>>>>>             b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>>>>>
>>>>>                 index a9c3580be8c9..4708b8c811a5 100644
>>>>>                 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>>>>>                 +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>>>>>                 @@ -437,12 +437,8 @@ static void
>>>>>                 event_interrupt_wq_v9(struct
>>>>>
>>>>>             kfd_node *dev,
>>>>>
>>>>>                 struct kfd_hsa_memory_exception_data exception_data;
>>>>>
>>>>>                                /* gfxhub */
>>>>>                 -             if (!vmid_type &&
>>>>>                 dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
>>>>>                 -                     hub_inst =
>>>>>                 dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev-
>>>>>                 adev,
>>>>>                 - node_id);
>>>>>                 -                     if (hub_inst < 0)
>>>>>                 -                             hub_inst = 0;
>>>>>                 -             }
>>>>>                 +             if (!vmid_type)
>>>>>                 +                     hub_inst =
>>>>>                 amdgpu_amdkfd_node_id_to_xcc_id(dev->adev,
>>>>>
>>>>>             node_id);
>>>>>
>>>>>                                /* mmhub */
>>>>>                                if (vmid_type && client_id ==
>>>>>                 SOC15_IH_CLIENTID_VMC)
>>>>>                 diff --git
>>>>>                 a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>>>>>
>>>>>             b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>>>>>
>>>>>                 index ea6a8e43bd5b..b5f2f5b1069c 100644
>>>>>                 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>>>>>                 +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>>>>>                 @@ -251,8 +251,9 @@ void
>>>>>
>>>>>             kfd_smi_event_update_thermal_throttling(struct
>>>>>             kfd_node *dev,
>>>>>
>>>>>                    void kfd_smi_event_update_vmfault(struct
>>>>>                 kfd_node *dev, uint16_t pasid)
>>>>>                    {
>>>>>                        struct amdgpu_task_info *task_info;
>>>>>                 +     uint32_t xcp_id = dev->xcp ? dev->xcp->id : 0;
>>>>>
>>>>>                 -     task_info =
>>>>>                 amdgpu_vm_get_task_info_pasid(dev->adev, pasid);
>>>>>                 +     task_info =
>>>>>                 amdgpu_vm_get_task_info_pasid(dev->adev, pasid,
>>>>>                 xcp_id);
>>>>>                        if (task_info) {
>>>>>                                /* Report VM faults from user
>>>>>                 applications, not retry from kernel */
>>>>>                                if (task_info->pid)
>>>>>
>>>
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2024-09-25 19:21 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-08-19 17:59 [PATCH] drm/amdkfd: fix vm-pasid lookup for multiple partitions Jonathan Kim
2024-09-05 14:23 ` Christian König
2024-09-09 16:02   ` Kim, Jonathan
2024-09-09 18:46     ` Christian König
2024-09-10 15:23       ` Philip Yang
2024-09-10 20:47         ` Kim, Jonathan
2024-09-10 23:59           ` Chen, Xiaogang
2024-09-11  6:54             ` Christian König
2024-09-11 13:40               ` Philip Yang
2024-09-11 13:58                 ` Christian König
2024-09-25 19:20                   ` Felix Kuehling

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox