Linux kernel -stable discussions
 help / color / mirror / Atom feed
* [PATCH 0/6] Bug:211277 fix backport for 5.10 stable
@ 2021-12-09 22:09 James Zhu
  2021-12-09 22:09 ` [PATCH 1/6] drm/amd/amdkfd: adjust dummy functions' placement James Zhu
                   ` (6 more replies)
  0 siblings, 7 replies; 14+ messages in thread
From: James Zhu @ 2021-12-09 22:09 UTC (permalink / raw)
  To: stable; +Cc: jzhums, alexander.deucher, kolAflash

These patches are back port for 5.10 stable.
They are cherry-picked from 5.14 stable.

BugFix: https://bugzilla.kernel.org/show_bug.cgi?id=211277

James Zhu (3):
  drm/amdkfd: separate kfd_iommu_resume from kfd_resume
  drm/amdgpu: add amdgpu_amdkfd_resume_iommu
  drm/amdgpu: move iommu_resume before ip init/resume

Lang Yu (1):
  drm/amd/amdkfd: adjust dummy functions' placement

Yifan Zhang (2):
  drm/amdgpu: init iommu after amdkfd device init
  drm/amdkfd: fix boot failure when iommu is disabled in Picasso.

 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  97 ++------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 145 ++++++++++++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   8 ++
 drivers/gpu/drm/amd/amdkfd/kfd_device.c    |  15 ++-
 4 files changed, 155 insertions(+), 110 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/6] drm/amd/amdkfd: adjust dummy functions' placement
  2021-12-09 22:09 [PATCH 0/6] Bug:211277 fix backport for 5.10 stable James Zhu
@ 2021-12-09 22:09 ` James Zhu
  2021-12-09 22:09 ` [PATCH 2/6] drm/amdkfd: separate kfd_iommu_resume from kfd_resume James Zhu
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: James Zhu @ 2021-12-09 22:09 UTC (permalink / raw)
  To: stable
  Cc: jzhums, alexander.deucher, kolAflash, Lang Yu, Felix Kuehling,
	Huang Rui

From: Lang Yu <Lang.Yu@amd.com>

commit cd63989e0e6aa2eb66b461f2bae769e2550e47ac upstream.

Move all the dummy functions in amdgpu_amdkfd.c to
amdgpu_amdkfd.h as inline functions.

Signed-off-by: Lang Yu <Lang.Yu@amd.com>
Suggested-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: James Zhu <James.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  87 -------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 138 ++++++++++++++++++---
 2 files changed, 119 insertions(+), 106 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 0544460653b9..b23b31dc570e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -47,12 +47,8 @@ int amdgpu_amdkfd_init(void)
 	amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh;
 	amdgpu_amdkfd_total_mem_size *= si.mem_unit;
 
-#ifdef CONFIG_HSA_AMD
 	ret = kgd2kfd_init();
 	amdgpu_amdkfd_gpuvm_init_mem_limits();
-#else
-	ret = -ENOENT;
-#endif
 	kfd_initialized = !ret;
 
 	return ret;
@@ -695,86 +691,3 @@ bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
 
 	return adev->have_atomics_support;
 }
-
-#ifndef CONFIG_HSA_AMD
-bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
-{
-	return false;
-}
-
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
-{
-}
-
-int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
-{
-	return 0;
-}
-
-void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
-					struct amdgpu_vm *vm)
-{
-}
-
-struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
-{
-	return NULL;
-}
-
-int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm)
-{
-	return 0;
-}
-
-struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
-			      unsigned int asic_type, bool vf)
-{
-	return NULL;
-}
-
-bool kgd2kfd_device_init(struct kfd_dev *kfd,
-			 struct drm_device *ddev,
-			 const struct kgd2kfd_shared_resources *gpu_resources)
-{
-	return false;
-}
-
-void kgd2kfd_device_exit(struct kfd_dev *kfd)
-{
-}
-
-void kgd2kfd_exit(void)
-{
-}
-
-void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
-{
-}
-
-int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
-{
-	return 0;
-}
-
-int kgd2kfd_pre_reset(struct kfd_dev *kfd)
-{
-	return 0;
-}
-
-int kgd2kfd_post_reset(struct kfd_dev *kfd)
-{
-	return 0;
-}
-
-void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
-{
-}
-
-void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
-{
-}
-
-void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
-{
-}
-#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index ea391ca7f2f1..a81d9cacf9b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -94,11 +94,6 @@ enum kgd_engine_type {
 	KGD_ENGINE_MAX
 };
 
-struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
-						       struct mm_struct *mm);
-bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
-struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
-int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
 
 struct amdkfd_process_info {
 	/* List head of all VMs that belong to a KFD process */
@@ -132,8 +127,6 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
-
-int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
 int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 				uint32_t vmid, uint64_t gpu_addr,
 				uint32_t *ib_cmd, uint32_t ib_len);
@@ -153,6 +146,38 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd);
 int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
 					int queue_bit);
 
+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+								struct mm_struct *mm);
+#if IS_ENABLED(CONFIG_HSA_AMD)
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
+#else
+static inline
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
+{
+	return false;
+}
+
+static inline
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
+{
+	return NULL;
+}
+
+static inline
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+{
+	return 0;
+}
+
+static inline
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm)
+{
+	return 0;
+}
+#endif
 /* Shared API */
 int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 				void **mem_obj, uint64_t *gpu_addr,
@@ -215,8 +240,6 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
 					struct file *filp, u32 pasid,
 					void **vm, void **process_info,
 					struct dma_fence **ef);
-void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
-				struct amdgpu_vm *vm);
 void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
 void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm);
 uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
@@ -236,23 +259,43 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
 		struct kgd_mem *mem, void **kptr, uint64_t *size);
 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
 					    struct dma_fence **ef);
-
 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
 					      struct kfd_vm_fault_info *info);
-
 int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
 				      struct dma_buf *dmabuf,
 				      uint64_t va, void *vm,
 				      struct kgd_mem **mem, uint64_t *size,
 				      uint64_t *mmap_offset);
-
-void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
-
 int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
 				struct tile_config *config);
+#if IS_ENABLED(CONFIG_HSA_AMD)
+void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm);
+void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
+#else
+static inline
+void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
+{
+}
 
+static inline
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+					struct amdgpu_vm *vm)
+{
+}
+
+static inline
+void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
+{
+}
+#endif
 /* KGD2KFD callbacks */
+int kgd2kfd_quiesce_mm(struct mm_struct *mm);
+int kgd2kfd_resume_mm(struct mm_struct *mm);
+int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
+						struct dma_fence *fence);
+#if IS_ENABLED(CONFIG_HSA_AMD)
 int kgd2kfd_init(void);
 void kgd2kfd_exit(void);
 struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
@@ -266,11 +309,68 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
 int kgd2kfd_pre_reset(struct kfd_dev *kfd);
 int kgd2kfd_post_reset(struct kfd_dev *kfd);
 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
-int kgd2kfd_quiesce_mm(struct mm_struct *mm);
-int kgd2kfd_resume_mm(struct mm_struct *mm);
-int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
-					       struct dma_fence *fence);
 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
 void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask);
+#else
+static inline int kgd2kfd_init(void)
+{
+	return -ENOENT;
+}
 
+static inline void kgd2kfd_exit(void)
+{
+}
+
+static inline
+struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
+					unsigned int asic_type, bool vf)
+{
+	return NULL;
+}
+
+static inline
+bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev,
+				const struct kgd2kfd_shared_resources *gpu_resources)
+{
+	return false;
+}
+
+static inline void kgd2kfd_device_exit(struct kfd_dev *kfd)
+{
+}
+
+static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
+{
+}
+
+static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
+{
+	return 0;
+}
+
+static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd)
+{
+	return 0;
+}
+
+static inline int kgd2kfd_post_reset(struct kfd_dev *kfd)
+{
+	return 0;
+}
+
+static inline
+void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
+{
+}
+
+static inline
+void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
+{
+}
+
+static inline
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
+{
+}
+#endif
 #endif /* AMDGPU_AMDKFD_H_INCLUDED */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/6] drm/amdkfd: separate kfd_iommu_resume from kfd_resume
  2021-12-09 22:09 [PATCH 0/6] Bug:211277 fix backport for 5.10 stable James Zhu
  2021-12-09 22:09 ` [PATCH 1/6] drm/amd/amdkfd: adjust dummy functions' placement James Zhu
@ 2021-12-09 22:09 ` James Zhu
  2021-12-09 22:09 ` [PATCH 3/6] drm/amdgpu: add amdgpu_amdkfd_resume_iommu James Zhu
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: James Zhu @ 2021-12-09 22:09 UTC (permalink / raw)
  To: stable
  Cc: jzhums, alexander.deucher, kolAflash, Felix Kuehling,
	Greg Kroah-Hartman

commit fefc01f042f44ede373ee66773b8238dd8fdcb55 upstream.

Separate kfd_iommu_resume from kfd_resume for fine-tuning
of amdgpu device init/resume/reset/recovery sequence.

v2: squash in fix for !CONFIG_HSA_AMD

Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211277
Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: James Zhu <James.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  6 ++++++
 drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 12 ++++++++----
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index a81d9cacf9b8..8a402a3df412 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -305,6 +305,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 			 const struct kgd2kfd_shared_resources *gpu_resources);
 void kgd2kfd_device_exit(struct kfd_dev *kfd);
 void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);
+int kgd2kfd_resume_iommu(struct kfd_dev *kfd);
 int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
 int kgd2kfd_pre_reset(struct kfd_dev *kfd);
 int kgd2kfd_post_reset(struct kfd_dev *kfd);
@@ -343,6 +344,11 @@ static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
 {
 }
 
+static int __maybe_unused kgd2kfd_resume_iommu(struct kfd_dev *kfd)
+{
+	return 0;
+}
+
 static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
 {
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 5751bddc9cad..1204dae85797 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -896,17 +896,21 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
 	return ret;
 }
 
-static int kfd_resume(struct kfd_dev *kfd)
+int kgd2kfd_resume_iommu(struct kfd_dev *kfd)
 {
 	int err = 0;
 
 	err = kfd_iommu_resume(kfd);
-	if (err) {
+	if (err)
 		dev_err(kfd_device,
 			"Failed to resume IOMMU for device %x:%x\n",
 			kfd->pdev->vendor, kfd->pdev->device);
-		return err;
-	}
+	return err;
+}
+
+static int kfd_resume(struct kfd_dev *kfd)
+{
+	int err = 0;
 
 	err = kfd->dqm->ops.start(kfd->dqm);
 	if (err) {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/6] drm/amdgpu: add amdgpu_amdkfd_resume_iommu
  2021-12-09 22:09 [PATCH 0/6] Bug:211277 fix backport for 5.10 stable James Zhu
  2021-12-09 22:09 ` [PATCH 1/6] drm/amd/amdkfd: adjust dummy functions' placement James Zhu
  2021-12-09 22:09 ` [PATCH 2/6] drm/amdkfd: separate kfd_iommu_resume from kfd_resume James Zhu
@ 2021-12-09 22:09 ` James Zhu
  2021-12-09 22:09 ` [PATCH 4/6] drm/amdgpu: move iommu_resume before ip init/resume James Zhu
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: James Zhu @ 2021-12-09 22:09 UTC (permalink / raw)
  To: stable
  Cc: jzhums, alexander.deucher, kolAflash, Felix Kuehling,
	Greg Kroah-Hartman

commit 8066008482e533e91934bee49765bf8b4a7c40db upstream.

Add amdgpu_amdkfd_resume_iommu for amdgpu.

Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211277
Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: James Zhu <James.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 10 ++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  1 +
 2 files changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index b23b31dc570e..fb6230c62daa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -190,6 +190,16 @@ void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
 		kgd2kfd_suspend(adev->kfd.dev, run_pm);
 }
 
+int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev)
+{
+	int r = 0;
+
+	if (adev->kfd.dev)
+		r = kgd2kfd_resume_iommu(adev->kfd.dev);
+
+	return r;
+}
+
 int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
 {
 	int r = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 8a402a3df412..32e385f287cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -121,6 +121,7 @@ int amdgpu_amdkfd_init(void);
 void amdgpu_amdkfd_fini(void);
 
 void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
+int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev);
 int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
 void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
 			const void *ih_ring_entry);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 4/6] drm/amdgpu: move iommu_resume before ip init/resume
  2021-12-09 22:09 [PATCH 0/6] Bug:211277 fix backport for 5.10 stable James Zhu
                   ` (2 preceding siblings ...)
  2021-12-09 22:09 ` [PATCH 3/6] drm/amdgpu: add amdgpu_amdkfd_resume_iommu James Zhu
@ 2021-12-09 22:09 ` James Zhu
  2021-12-09 22:09 ` [PATCH 5/6] drm/amdgpu: init iommu after amdkfd device init James Zhu
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: James Zhu @ 2021-12-09 22:09 UTC (permalink / raw)
  To: stable
  Cc: jzhums, alexander.deucher, kolAflash, Felix Kuehling,
	Greg Kroah-Hartman

commit f02abeb0779700c308e661a412451b38962b8a0b upstream.

Separate iommu_resume from kfd_resume, and move it before
other amdgpu ip init/resume.

Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211277
Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: James Zhu <James.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 97723f2b5ece..2947bded074a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2220,6 +2220,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 	if (r)
 		goto init_failed;
 
+	r = amdgpu_amdkfd_resume_iommu(adev);
+	if (r)
+		goto init_failed;
+
 	r = amdgpu_device_ip_hw_init_phase1(adev);
 	if (r)
 		goto init_failed;
@@ -2913,6 +2917,10 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
 {
 	int r;
 
+	r = amdgpu_amdkfd_resume_iommu(adev);
+	if (r)
+		return r;
+
 	r = amdgpu_device_ip_resume_phase1(adev);
 	if (r)
 		return r;
@@ -4296,6 +4304,10 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
 
 			if (!r) {
 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
+				r = amdgpu_amdkfd_resume_iommu(tmp_adev);
+				if (r)
+					goto out;
+
 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
 				if (r)
 					goto out;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 5/6] drm/amdgpu: init iommu after amdkfd device init
  2021-12-09 22:09 [PATCH 0/6] Bug:211277 fix backport for 5.10 stable James Zhu
                   ` (3 preceding siblings ...)
  2021-12-09 22:09 ` [PATCH 4/6] drm/amdgpu: move iommu_resume before ip init/resume James Zhu
@ 2021-12-09 22:09 ` James Zhu
  2021-12-09 22:09 ` [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso James Zhu
  2021-12-10  6:33 ` [PATCH 0/6] Bug:211277 fix backport for 5.10 stable Greg KH
  6 siblings, 0 replies; 14+ messages in thread
From: James Zhu @ 2021-12-09 22:09 UTC (permalink / raw)
  To: stable
  Cc: jzhums, alexander.deucher, kolAflash, Yifan Zhang, Felix Kuehling,
	Sasha Levin

From: Yifan Zhang <yifan1.zhang@amd.com>

[ Upstream commit 714d9e4574d54596973ee3b0624ee4a16264d700 ]

This patch is to fix clinfo failure in Raven/Picasso:

Number of platforms: 1
  Platform Profile: FULL_PROFILE
  Platform Version: OpenCL 2.2 AMD-APP (3364.0)
  Platform Name: AMD Accelerated Parallel Processing
  Platform Vendor: Advanced Micro Devices, Inc.
  Platform Extensions: cl_khr_icd cl_amd_event_callback

  Platform Name: AMD Accelerated Parallel Processing Number of devices: 0

Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
Reviewed-by: James Zhu <James.Zhu@amd.com>
Tested-by: James Zhu <James.Zhu@amd.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: James Zhu <James.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2947bded074a..488e574f5da1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2220,10 +2220,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 	if (r)
 		goto init_failed;
 
-	r = amdgpu_amdkfd_resume_iommu(adev);
-	if (r)
-		goto init_failed;
-
 	r = amdgpu_device_ip_hw_init_phase1(adev);
 	if (r)
 		goto init_failed;
@@ -2259,6 +2255,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 		amdgpu_xgmi_add_device(adev);
 	amdgpu_amdkfd_device_init(adev);
 
+	r = amdgpu_amdkfd_resume_iommu(adev);
+	if (r)
+		goto init_failed;
+
 	amdgpu_fru_get_product_info(adev);
 
 init_failed:
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso.
  2021-12-09 22:09 [PATCH 0/6] Bug:211277 fix backport for 5.10 stable James Zhu
                   ` (4 preceding siblings ...)
  2021-12-09 22:09 ` [PATCH 5/6] drm/amdgpu: init iommu after amdkfd device init James Zhu
@ 2021-12-09 22:09 ` James Zhu
  2021-12-10 13:33   ` Greg Kroah-Hartman
  2021-12-10  6:33 ` [PATCH 0/6] Bug:211277 fix backport for 5.10 stable Greg KH
  6 siblings, 1 reply; 14+ messages in thread
From: James Zhu @ 2021-12-09 22:09 UTC (permalink / raw)
  To: stable
  Cc: jzhums, alexander.deucher, kolAflash, Yifan Zhang, youling,
	Greg Kroah-Hartman

From: Yifan Zhang <yifan1.zhang@amd.com>

commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.

When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2
init will fail. But this failure should not block amdgpu driver init.

Reported-by: youling <youling257@gmail.com>
Tested-by: youling <youling257@gmail.com>
Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
Reviewed-by: James Zhu <James.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: James Zhu <James.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ----
 drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 3 +++
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 488e574f5da1..f262c4e7a48a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 		amdgpu_xgmi_add_device(adev);
 	amdgpu_amdkfd_device_init(adev);
 
-	r = amdgpu_amdkfd_resume_iommu(adev);
-	if (r)
-		goto init_failed;
-
 	amdgpu_fru_get_product_info(adev);
 
 init_failed:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 1204dae85797..b35f0af71f00 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 
 	kfd_cwsr_init(kfd);
 
+	if (kgd2kfd_resume_iommu(kfd))
+		goto device_iommu_error;
+
 	if (kfd_resume(kfd))
 		goto kfd_resume_error;
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/6] Bug:211277 fix backport for 5.10 stable
  2021-12-09 22:09 [PATCH 0/6] Bug:211277 fix backport for 5.10 stable James Zhu
                   ` (5 preceding siblings ...)
  2021-12-09 22:09 ` [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso James Zhu
@ 2021-12-10  6:33 ` Greg KH
  6 siblings, 0 replies; 14+ messages in thread
From: Greg KH @ 2021-12-10  6:33 UTC (permalink / raw)
  To: James Zhu; +Cc: stable, jzhums, alexander.deucher, kolAflash

On Thu, Dec 09, 2021 at 05:09:50PM -0500, James Zhu wrote:
> These patches are back port for 5.10 stable.
> They are cherry-picked from 5.14 stable.
> 
> BugFix: https://bugzilla.kernel.org/show_bug.cgi?id=211277
> 
> James Zhu (3):
>   drm/amdkfd: separate kfd_iommu_resume from kfd_resume
>   drm/amdgpu: add amdgpu_amdkfd_resume_iommu
>   drm/amdgpu: move iommu_resume before ip init/resume
> 
> Lang Yu (1):
>   drm/amd/amdkfd: adjust dummy functions' placement
> 
> Yifan Zhang (2):
>   drm/amdgpu: init iommu after amdkfd device init
>   drm/amdkfd: fix boot failure when iommu is disabled in Picasso.

What has changed from the last time this series was submitted?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso.
  2021-12-09 22:09 ` [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso James Zhu
@ 2021-12-10 13:33   ` Greg Kroah-Hartman
  2021-12-10 14:14     ` James Zhu
  0 siblings, 1 reply; 14+ messages in thread
From: Greg Kroah-Hartman @ 2021-12-10 13:33 UTC (permalink / raw)
  To: James Zhu
  Cc: stable, jzhums, alexander.deucher, kolAflash, Yifan Zhang,
	youling

On Thu, Dec 09, 2021 at 05:09:56PM -0500, James Zhu wrote:
> From: Yifan Zhang <yifan1.zhang@amd.com>
> 
> commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.
> 
> When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2
> init will fail. But this failure should not block amdgpu driver init.
> 
> Reported-by: youling <youling257@gmail.com>
> Tested-by: youling <youling257@gmail.com>
> Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
> Reviewed-by: James Zhu <James.Zhu@amd.com>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Signed-off-by: James Zhu <James.Zhu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ----
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 3 +++
>  2 files changed, 3 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 488e574f5da1..f262c4e7a48a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
>  		amdgpu_xgmi_add_device(adev);
>  	amdgpu_amdkfd_device_init(adev);
>  
> -	r = amdgpu_amdkfd_resume_iommu(adev);
> -	if (r)
> -		goto init_failed;
> -
>  	amdgpu_fru_get_product_info(adev);
>  
>  init_failed:
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 1204dae85797..b35f0af71f00 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>  
>  	kfd_cwsr_init(kfd);
>  
> +	if (kgd2kfd_resume_iommu(kfd))
> +		goto device_iommu_error;
> +
>  	if (kfd_resume(kfd))
>  		goto kfd_resume_error;
>  
> -- 
> 2.25.1
> 

Like I said last time, do not change the backport unless you HAVE to.
You did it here again for no good reason :(

greg k-h

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso.
  2021-12-10 13:33   ` Greg Kroah-Hartman
@ 2021-12-10 14:14     ` James Zhu
  2021-12-10 14:35       ` Greg Kroah-Hartman
  0 siblings, 1 reply; 14+ messages in thread
From: James Zhu @ 2021-12-10 14:14 UTC (permalink / raw)
  To: Greg Kroah-Hartman, James Zhu
  Cc: stable, jzhums, alexander.deucher, kolAflash, Yifan Zhang,
	youling


On 2021-12-10 8:33 a.m., Greg Kroah-Hartman wrote:
> On Thu, Dec 09, 2021 at 05:09:56PM -0500, James Zhu wrote:
>> From: Yifan Zhang <yifan1.zhang@amd.com>
>>
>> commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.
>>
>> When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2
>> init will fail. But this failure should not block amdgpu driver init.
>>
>> Reported-by: youling <youling257@gmail.com>
>> Tested-by: youling <youling257@gmail.com>
>> Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
>> Reviewed-by: James Zhu <James.Zhu@amd.com>
>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
>> Signed-off-by: James Zhu <James.Zhu@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ----
>>   drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 3 +++
>>   2 files changed, 3 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 488e574f5da1..f262c4e7a48a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
>>   		amdgpu_xgmi_add_device(adev);
>>   	amdgpu_amdkfd_device_init(adev);
>>   
>> -	r = amdgpu_amdkfd_resume_iommu(adev);
>> -	if (r)
>> -		goto init_failed;
>> -
>>   	amdgpu_fru_get_product_info(adev);
>>   
>>   init_failed:
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> index 1204dae85797..b35f0af71f00 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> @@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>>   
>>   	kfd_cwsr_init(kfd);
>>   
>> +	if (kgd2kfd_resume_iommu(kfd))
>> +		goto device_iommu_error;
>> +
>>   	if (kfd_resume(kfd))
>>   		goto kfd_resume_error;
>>   
>> -- 
>> 2.25.1
>>
> Like I said last time, do not change the backport unless you HAVE to.
> You did it here again for no good reason :(

[JZ] Yes, I should add more explanation next time.

Backport conflict fix to remove  svm_migrate_init((struct amdgpu_device 
*)kfd->kgd);

new AMD svm feature has not been added for 5.10 So it is safe to remove it.

>
> greg k-h

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso.
  2021-12-10 14:14     ` James Zhu
@ 2021-12-10 14:35       ` Greg Kroah-Hartman
  2021-12-10 14:46         ` James Zhu
  0 siblings, 1 reply; 14+ messages in thread
From: Greg Kroah-Hartman @ 2021-12-10 14:35 UTC (permalink / raw)
  To: James Zhu
  Cc: James Zhu, stable, jzhums, alexander.deucher, kolAflash,
	Yifan Zhang, youling

On Fri, Dec 10, 2021 at 09:14:30AM -0500, James Zhu wrote:
> 
> On 2021-12-10 8:33 a.m., Greg Kroah-Hartman wrote:
> > On Thu, Dec 09, 2021 at 05:09:56PM -0500, James Zhu wrote:
> > > From: Yifan Zhang <yifan1.zhang@amd.com>
> > > 
> > > commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.
> > > 
> > > When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2
> > > init will fail. But this failure should not block amdgpu driver init.
> > > 
> > > Reported-by: youling <youling257@gmail.com>
> > > Tested-by: youling <youling257@gmail.com>
> > > Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
> > > Reviewed-by: James Zhu <James.Zhu@amd.com>
> > > Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> > > Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> > > Signed-off-by: James Zhu <James.Zhu@amd.com>
> > > ---
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ----
> > >   drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 3 +++
> > >   2 files changed, 3 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > index 488e574f5da1..f262c4e7a48a 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > @@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
> > >   		amdgpu_xgmi_add_device(adev);
> > >   	amdgpu_amdkfd_device_init(adev);
> > > -	r = amdgpu_amdkfd_resume_iommu(adev);
> > > -	if (r)
> > > -		goto init_failed;
> > > -
> > >   	amdgpu_fru_get_product_info(adev);
> > >   init_failed:
> > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > > index 1204dae85797..b35f0af71f00 100644
> > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > > @@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
> > >   	kfd_cwsr_init(kfd);
> > > +	if (kgd2kfd_resume_iommu(kfd))
> > > +		goto device_iommu_error;
> > > +
> > >   	if (kfd_resume(kfd))
> > >   		goto kfd_resume_error;
> > > -- 
> > > 2.25.1
> > > 
> > Like I said last time, do not change the backport unless you HAVE to.
> > You did it here again for no good reason :(
> 
> [JZ] Yes, I should add more explanation next time.
> 
> Backport conflict fix to remove  svm_migrate_init((struct amdgpu_device
> *)kfd->kgd);
> 
> new AMD svm feature has not been added for 5.10 So it is safe to remove it.

No, I am talking about the fact that you fixed up a coding style fix in
this backport that is not in the original commit in Linus's tree.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso.
  2021-12-10 14:35       ` Greg Kroah-Hartman
@ 2021-12-10 14:46         ` James Zhu
  2021-12-10 15:12           ` Greg Kroah-Hartman
  0 siblings, 1 reply; 14+ messages in thread
From: James Zhu @ 2021-12-10 14:46 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: James Zhu, stable, jzhums, alexander.deucher, kolAflash,
	Yifan Zhang, youling


On 2021-12-10 9:35 a.m., Greg Kroah-Hartman wrote:
> On Fri, Dec 10, 2021 at 09:14:30AM -0500, James Zhu wrote:
>> On 2021-12-10 8:33 a.m., Greg Kroah-Hartman wrote:
>>> On Thu, Dec 09, 2021 at 05:09:56PM -0500, James Zhu wrote:
>>>> From: Yifan Zhang <yifan1.zhang@amd.com>
>>>>
>>>> commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.
>>>>
>>>> When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2
>>>> init will fail. But this failure should not block amdgpu driver init.
>>>>
>>>> Reported-by: youling <youling257@gmail.com>
>>>> Tested-by: youling <youling257@gmail.com>
>>>> Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
>>>> Reviewed-by: James Zhu <James.Zhu@amd.com>
>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
>>>> Signed-off-by: James Zhu <James.Zhu@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ----
>>>>    drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 3 +++
>>>>    2 files changed, 3 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index 488e574f5da1..f262c4e7a48a 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
>>>>    		amdgpu_xgmi_add_device(adev);
>>>>    	amdgpu_amdkfd_device_init(adev);
>>>> -	r = amdgpu_amdkfd_resume_iommu(adev);
>>>> -	if (r)
>>>> -		goto init_failed;
>>>> -
>>>>    	amdgpu_fru_get_product_info(adev);
>>>>    init_failed:
>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>> index 1204dae85797..b35f0af71f00 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>> @@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>>>>    	kfd_cwsr_init(kfd);
>>>> +	if (kgd2kfd_resume_iommu(kfd))
>>>> +		goto device_iommu_error;
>>>> +
>>>>    	if (kfd_resume(kfd))
>>>>    		goto kfd_resume_error;
>>>> -- 
>>>> 2.25.1
>>>>
>>> Like I said last time, do not change the backport unless you HAVE to.
>>> You did it here again for no good reason :(
>> [JZ] Yes, I should add more explanation next time.
>>
>> Backport conflict fix to remove  svm_migrate_init((struct amdgpu_device
>> *)kfd->kgd);
>>
>> new AMD svm feature has not been added for 5.10 So it is safe to remove it.
> No, I am talking about the fact that you fixed up a coding style fix in
> this backport that is not in the original commit in Linus's tree.

[JZ] I see. this fix is not necessary. Do you want me to send v2 with

this unnecessary coding style fix dropping for backport?


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso.
  2021-12-10 14:46         ` James Zhu
@ 2021-12-10 15:12           ` Greg Kroah-Hartman
  2021-12-10 15:33             ` James Zhu
  0 siblings, 1 reply; 14+ messages in thread
From: Greg Kroah-Hartman @ 2021-12-10 15:12 UTC (permalink / raw)
  To: James Zhu
  Cc: James Zhu, stable, jzhums, alexander.deucher, kolAflash,
	Yifan Zhang, youling

On Fri, Dec 10, 2021 at 09:46:08AM -0500, James Zhu wrote:
> 
> On 2021-12-10 9:35 a.m., Greg Kroah-Hartman wrote:
> > On Fri, Dec 10, 2021 at 09:14:30AM -0500, James Zhu wrote:
> > > On 2021-12-10 8:33 a.m., Greg Kroah-Hartman wrote:
> > > > On Thu, Dec 09, 2021 at 05:09:56PM -0500, James Zhu wrote:
> > > > > From: Yifan Zhang <yifan1.zhang@amd.com>
> > > > > 
> > > > > commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.
> > > > > 
> > > > > When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2
> > > > > init will fail. But this failure should not block amdgpu driver init.
> > > > > 
> > > > > Reported-by: youling <youling257@gmail.com>
> > > > > Tested-by: youling <youling257@gmail.com>
> > > > > Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
> > > > > Reviewed-by: James Zhu <James.Zhu@amd.com>
> > > > > Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> > > > > Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> > > > > Signed-off-by: James Zhu <James.Zhu@amd.com>
> > > > > ---
> > > > >    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ----
> > > > >    drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 3 +++
> > > > >    2 files changed, 3 insertions(+), 4 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > index 488e574f5da1..f262c4e7a48a 100644
> > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > @@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
> > > > >    		amdgpu_xgmi_add_device(adev);
> > > > >    	amdgpu_amdkfd_device_init(adev);
> > > > > -	r = amdgpu_amdkfd_resume_iommu(adev);
> > > > > -	if (r)
> > > > > -		goto init_failed;
> > > > > -
> > > > >    	amdgpu_fru_get_product_info(adev);
> > > > >    init_failed:
> > > > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > > > > index 1204dae85797..b35f0af71f00 100644
> > > > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > > > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > > > > @@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
> > > > >    	kfd_cwsr_init(kfd);
> > > > > +	if (kgd2kfd_resume_iommu(kfd))
> > > > > +		goto device_iommu_error;
> > > > > +
> > > > >    	if (kfd_resume(kfd))
> > > > >    		goto kfd_resume_error;
> > > > > -- 
> > > > > 2.25.1
> > > > > 
> > > > Like I said last time, do not change the backport unless you HAVE to.
> > > > You did it here again for no good reason :(
> > > [JZ] Yes, I should add more explanation next time.
> > > 
> > > Backport conflict fix to remove  svm_migrate_init((struct amdgpu_device
> > > *)kfd->kgd);
> > > 
> > > new AMD svm feature has not been added for 5.10 So it is safe to remove it.
> > No, I am talking about the fact that you fixed up a coding style fix in
> > this backport that is not in the original commit in Linus's tree.
> 
> [JZ] I see. this fix is not necessary. Do you want me to send v2 with
> 
> this unnecessary coding style fix dropping for backport?
> 

I took what was in Linus's tree already.  Please verify that what I
applied to the queue still works.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso.
  2021-12-10 15:12           ` Greg Kroah-Hartman
@ 2021-12-10 15:33             ` James Zhu
  0 siblings, 0 replies; 14+ messages in thread
From: James Zhu @ 2021-12-10 15:33 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: James Zhu, stable, jzhums, alexander.deucher, kolAflash,
	Yifan Zhang, youling


On 2021-12-10 10:12 a.m., Greg Kroah-Hartman wrote:
> On Fri, Dec 10, 2021 at 09:46:08AM -0500, James Zhu wrote:
>> On 2021-12-10 9:35 a.m., Greg Kroah-Hartman wrote:
>>> On Fri, Dec 10, 2021 at 09:14:30AM -0500, James Zhu wrote:
>>>> On 2021-12-10 8:33 a.m., Greg Kroah-Hartman wrote:
>>>>> On Thu, Dec 09, 2021 at 05:09:56PM -0500, James Zhu wrote:
>>>>>> From: Yifan Zhang <yifan1.zhang@amd.com>
>>>>>>
>>>>>> commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.
>>>>>>
>>>>>> When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2
>>>>>> init will fail. But this failure should not block amdgpu driver init.
>>>>>>
>>>>>> Reported-by: youling <youling257@gmail.com>
>>>>>> Tested-by: youling <youling257@gmail.com>
>>>>>> Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
>>>>>> Reviewed-by: James Zhu <James.Zhu@amd.com>
>>>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>>>> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
>>>>>> Signed-off-by: James Zhu <James.Zhu@amd.com>
>>>>>> ---
>>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ----
>>>>>>     drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 3 +++
>>>>>>     2 files changed, 3 insertions(+), 4 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> index 488e574f5da1..f262c4e7a48a 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> @@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
>>>>>>     		amdgpu_xgmi_add_device(adev);
>>>>>>     	amdgpu_amdkfd_device_init(adev);
>>>>>> -	r = amdgpu_amdkfd_resume_iommu(adev);
>>>>>> -	if (r)
>>>>>> -		goto init_failed;
>>>>>> -
>>>>>>     	amdgpu_fru_get_product_info(adev);
>>>>>>     init_failed:
>>>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>>>> index 1204dae85797..b35f0af71f00 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>>>> @@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>>>>>>     	kfd_cwsr_init(kfd);
>>>>>> +	if (kgd2kfd_resume_iommu(kfd))
>>>>>> +		goto device_iommu_error;
>>>>>> +
>>>>>>     	if (kfd_resume(kfd))
>>>>>>     		goto kfd_resume_error;
>>>>>> -- 
>>>>>> 2.25.1
>>>>>>
>>>>> Like I said last time, do not change the backport unless you HAVE to.
>>>>> You did it here again for no good reason :(
>>>> [JZ] Yes, I should add more explanation next time.
>>>>
>>>> Backport conflict fix to remove  svm_migrate_init((struct amdgpu_device
>>>> *)kfd->kgd);
>>>>
>>>> new AMD svm feature has not been added for 5.10 So it is safe to remove it.
>>> No, I am talking about the fact that you fixed up a coding style fix in
>>> this backport that is not in the original commit in Linus's tree.
>> [JZ] I see. this fix is not necessary. Do you want me to send v2 with
>>
>> this unnecessary coding style fix dropping for backport?
>>
> I took what was in Linus's tree already.  Please verify that what I
> applied to the queue still works.
[JZ] I verified it. It still work fine. Thanks for correction!
>
> thanks,
>
> greg k-h

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2021-12-10 15:33 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-12-09 22:09 [PATCH 0/6] Bug:211277 fix backport for 5.10 stable James Zhu
2021-12-09 22:09 ` [PATCH 1/6] drm/amd/amdkfd: adjust dummy functions' placement James Zhu
2021-12-09 22:09 ` [PATCH 2/6] drm/amdkfd: separate kfd_iommu_resume from kfd_resume James Zhu
2021-12-09 22:09 ` [PATCH 3/6] drm/amdgpu: add amdgpu_amdkfd_resume_iommu James Zhu
2021-12-09 22:09 ` [PATCH 4/6] drm/amdgpu: move iommu_resume before ip init/resume James Zhu
2021-12-09 22:09 ` [PATCH 5/6] drm/amdgpu: init iommu after amdkfd device init James Zhu
2021-12-09 22:09 ` [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso James Zhu
2021-12-10 13:33   ` Greg Kroah-Hartman
2021-12-10 14:14     ` James Zhu
2021-12-10 14:35       ` Greg Kroah-Hartman
2021-12-10 14:46         ` James Zhu
2021-12-10 15:12           ` Greg Kroah-Hartman
2021-12-10 15:33             ` James Zhu
2021-12-10  6:33 ` [PATCH 0/6] Bug:211277 fix backport for 5.10 stable Greg KH

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox