AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: add a flag to indicate if a VM is attached to fpriv
@ 2023-05-24  9:23 Guchun Chen
  2023-05-26  2:15 ` Chen, Guchun
  2023-05-26  7:36 ` Christian König
  0 siblings, 2 replies; 3+ messages in thread
From: Guchun Chen @ 2023-05-24  9:23 UTC (permalink / raw)
  To: amd-gfx, alexander.deucher, hawking.zhang, lijo.lazar,
	Philip.Yang, Felix.Kuehling
  Cc: Guchun Chen

Recent code stores xcp_id to amdgpu bo for accounting memory
usage or find correct KFD node, and this xcp_id is from file
private data after opening device. However, not all VMs are
attached to this fpriv structure like the case in amdgpu_mes_self_test.
So add a flag to differentiate the cases. Otherwise, KASAN will
complain out of bound access.

[   77.292314] BUG: KASAN: slab-out-of-bounds in amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[   77.293845] Read of size 4 at addr ffff888102c48a48 by task modprobe/1069
[   77.294146] Call Trace:
[   77.294178]  <TASK>
[   77.294208]  dump_stack_lvl+0x49/0x63
[   77.294260]  print_report+0x16f/0x4a6
[   77.294307]  ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[   77.295979]  ? kasan_complete_mode_report_info+0x3c/0x200
[   77.296057]  ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[   77.297556]  kasan_report+0xb4/0x130
[   77.297609]  ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[   77.299202]  __asan_load4+0x6f/0x90
[   77.299272]  amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[   77.300796]  ? amdgpu_init+0x6e/0x1000 [amdgpu]
[   77.302222]  ? amdgpu_vm_pt_clear+0x750/0x750 [amdgpu]
[   77.303721]  ? preempt_count_sub+0x18/0xc0
[   77.303786]  amdgpu_vm_init+0x39e/0x870 [amdgpu]
[   77.305186]  ? amdgpu_vm_wait_idle+0x90/0x90 [amdgpu]
[   77.306683]  ? kasan_set_track+0x25/0x30
[   77.306737]  ? kasan_save_alloc_info+0x1b/0x30
[   77.306795]  ? __kasan_kmalloc+0x87/0xa0
[   77.306852]  amdgpu_mes_self_test+0x169/0x620 [amdgpu]

Fixes: ffc6deb773f7("drm/amdkfd: Store xcp partition id to amdgpu bo")
Signed-off-by: Guchun Chen <guchun.chen@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  5 ++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  5 ++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 12 +++++++++---
 5 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 41d047e5de69..79b80f9233db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1229,7 +1229,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 		pasid = 0;
 	}
 
-	r = amdgpu_vm_init(adev, &fpriv->vm);
+	r = amdgpu_vm_init(adev, &fpriv->vm, true);
 	if (r)
 		goto error_pasid;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 49bb6c03d606..3be5219edf88 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -1345,7 +1345,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
 		goto error_pasid;
 	}
 
-	r = amdgpu_vm_init(adev, vm);
+	r = amdgpu_vm_init(adev, vm, false);
 	if (r) {
 		DRM_ERROR("failed to initialize vm\n");
 		goto error_pasid;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 37b9d8a8dbec..47ffaa1526a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2099,13 +2099,15 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
+ * @vm_attach_to_fpriv: flag to tell if vm is attached to file private data
  *
  * Init @vm fields.
  *
  * Returns:
  * 0 for success, error for failure.
  */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		   bool vm_attach_to_fpriv)
 {
 	struct amdgpu_bo *root_bo;
 	struct amdgpu_bo_vm *root;
@@ -2131,6 +2133,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
 	vm->pte_support_ats = false;
 	vm->is_compute_context = false;
+	vm->vm_attach_to_fpriv = vm_attach_to_fpriv;
 
 	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
 				    AMDGPU_VM_USE_CPU_FOR_GFX);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d551fca1780e..62ed14b1fc16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -333,6 +333,9 @@ struct amdgpu_vm {
 	/* Flag to indicate if VM is used for compute */
 	bool			is_compute_context;
 
+	/* Flag to tell if VM is attached to file private data */
+	bool vm_attach_to_fpriv;
+
 	/* Memory partition number, -1 means any partition */
 	int8_t			mem_id;
 };
@@ -392,7 +395,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			u32 pasid);
 
 long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, bool vm_attach_to_fpriv);
 int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index cc3b1b596e56..16b3350aa896 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -502,7 +502,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			int level, bool immediate, struct amdgpu_bo_vm **vmbo)
 {
-	struct amdgpu_fpriv *fpriv = container_of(vm, struct amdgpu_fpriv, vm);
+	struct amdgpu_fpriv *fpriv;
 	struct amdgpu_bo_param bp;
 	struct amdgpu_bo *bo;
 	struct dma_resv *resv;
@@ -535,7 +535,10 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 	bp.type = ttm_bo_type_kernel;
 	bp.no_wait_gpu = immediate;
-	bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
+	if (vm->vm_attach_to_fpriv) {
+		fpriv = container_of(vm, struct amdgpu_fpriv, vm);
+		bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
+	}
 
 	if (vm->root.bo)
 		bp.resv = vm->root.bo->tbo.base.resv;
@@ -561,7 +564,10 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	bp.type = ttm_bo_type_kernel;
 	bp.resv = bo->tbo.base.resv;
 	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
-	bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
+	if (vm->vm_attach_to_fpriv) {
+		fpriv = container_of(vm, struct amdgpu_fpriv, vm);
+		bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
+	}
 
 	r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* RE: [PATCH] drm/amdgpu: add a flag to indicate if a VM is attached to fpriv
  2023-05-24  9:23 [PATCH] drm/amdgpu: add a flag to indicate if a VM is attached to fpriv Guchun Chen
@ 2023-05-26  2:15 ` Chen, Guchun
  2023-05-26  7:36 ` Christian König
  1 sibling, 0 replies; 3+ messages in thread
From: Chen, Guchun @ 2023-05-26  2:15 UTC (permalink / raw)
  To: amd-gfx@lists.freedesktop.org, Deucher,  Alexander,
	Zhang, Hawking, Lazar, Lijo, Yang, Philip, Kuehling, Felix,
	Koenig, Christian

[Public]

Ping..

> -----Original Message-----
> From: Chen, Guchun <Guchun.Chen@amd.com>
> Sent: Wednesday, May 24, 2023 5:23 PM
> To: amd-gfx@lists.freedesktop.org; Deucher, Alexander
> <Alexander.Deucher@amd.com>; Zhang, Hawking
> <Hawking.Zhang@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; Yang, Philip
> <Philip.Yang@amd.com>; Kuehling, Felix <Felix.Kuehling@amd.com>
> Cc: Chen, Guchun <Guchun.Chen@amd.com>
> Subject: [PATCH] drm/amdgpu: add a flag to indicate if a VM is attached to
> fpriv
>
> Recent code stores xcp_id to amdgpu bo for accounting memory usage or
> find correct KFD node, and this xcp_id is from file private data after opening
> device. However, not all VMs are attached to this fpriv structure like the case
> in amdgpu_mes_self_test.
> So add a flag to differentiate the cases. Otherwise, KASAN will complain out
> of bound access.
>
> [   77.292314] BUG: KASAN: slab-out-of-bounds in
> amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
> [   77.293845] Read of size 4 at addr ffff888102c48a48 by task
> modprobe/1069
> [   77.294146] Call Trace:
> [   77.294178]  <TASK>
> [   77.294208]  dump_stack_lvl+0x49/0x63
> [   77.294260]  print_report+0x16f/0x4a6
> [   77.294307]  ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
> [   77.295979]  ? kasan_complete_mode_report_info+0x3c/0x200
> [   77.296057]  ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
> [   77.297556]  kasan_report+0xb4/0x130
> [   77.297609]  ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
> [   77.299202]  __asan_load4+0x6f/0x90
> [   77.299272]  amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
> [   77.300796]  ? amdgpu_init+0x6e/0x1000 [amdgpu]
> [   77.302222]  ? amdgpu_vm_pt_clear+0x750/0x750 [amdgpu]
> [   77.303721]  ? preempt_count_sub+0x18/0xc0
> [   77.303786]  amdgpu_vm_init+0x39e/0x870 [amdgpu]
> [   77.305186]  ? amdgpu_vm_wait_idle+0x90/0x90 [amdgpu]
> [   77.306683]  ? kasan_set_track+0x25/0x30
> [   77.306737]  ? kasan_save_alloc_info+0x1b/0x30
> [   77.306795]  ? __kasan_kmalloc+0x87/0xa0
> [   77.306852]  amdgpu_mes_self_test+0x169/0x620 [amdgpu]
>
> Fixes: ffc6deb773f7("drm/amdkfd: Store xcp partition id to amdgpu bo")
> Signed-off-by: Guchun Chen <guchun.chen@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c   |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  5 ++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  5 ++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 12 +++++++++---
>  5 files changed, 19 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 41d047e5de69..79b80f9233db 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -1229,7 +1229,7 @@ int amdgpu_driver_open_kms(struct drm_device
> *dev, struct drm_file *file_priv)
>               pasid = 0;
>       }
>
> -     r = amdgpu_vm_init(adev, &fpriv->vm);
> +     r = amdgpu_vm_init(adev, &fpriv->vm, true);
>       if (r)
>               goto error_pasid;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> index 49bb6c03d606..3be5219edf88 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> @@ -1345,7 +1345,7 @@ int amdgpu_mes_self_test(struct amdgpu_device
> *adev)
>               goto error_pasid;
>       }
>
> -     r = amdgpu_vm_init(adev, vm);
> +     r = amdgpu_vm_init(adev, vm, false);
>       if (r) {
>               DRM_ERROR("failed to initialize vm\n");
>               goto error_pasid;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 37b9d8a8dbec..47ffaa1526a0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2099,13 +2099,15 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm
> *vm, long timeout)
>   *
>   * @adev: amdgpu_device pointer
>   * @vm: requested vm
> + * @vm_attach_to_fpriv: flag to tell if vm is attached to file private
> + data
>   *
>   * Init @vm fields.
>   *
>   * Returns:
>   * 0 for success, error for failure.
>   */
> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> +                bool vm_attach_to_fpriv)
>  {
>       struct amdgpu_bo *root_bo;
>       struct amdgpu_bo_vm *root;
> @@ -2131,6 +2133,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
> struct amdgpu_vm *vm)
>
>       vm->pte_support_ats = false;
>       vm->is_compute_context = false;
> +     vm->vm_attach_to_fpriv = vm_attach_to_fpriv;
>
>       vm->use_cpu_for_update = !!(adev-
> >vm_manager.vm_update_mode &
>                                   AMDGPU_VM_USE_CPU_FOR_GFX);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index d551fca1780e..62ed14b1fc16 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -333,6 +333,9 @@ struct amdgpu_vm {
>       /* Flag to indicate if VM is used for compute */
>       bool                    is_compute_context;
>
> +     /* Flag to tell if VM is attached to file private data */
> +     bool vm_attach_to_fpriv;
> +
>       /* Memory partition number, -1 means any partition */
>       int8_t                  mem_id;
>  };
> @@ -392,7 +395,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device
> *adev, struct amdgpu_vm *vm,
>                       u32 pasid);
>
>  long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout); -int
> amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> +bool vm_attach_to_fpriv);
>  int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct
> amdgpu_vm *vm);  void amdgpu_vm_release_compute(struct
> amdgpu_device *adev, struct amdgpu_vm *vm);  void
> amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); diff
> --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> index cc3b1b596e56..16b3350aa896 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> @@ -502,7 +502,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device
> *adev, struct amdgpu_vm *vm,  int amdgpu_vm_pt_create(struct
> amdgpu_device *adev, struct amdgpu_vm *vm,
>                       int level, bool immediate, struct amdgpu_bo_vm
> **vmbo)  {
> -     struct amdgpu_fpriv *fpriv = container_of(vm, struct amdgpu_fpriv,
> vm);
> +     struct amdgpu_fpriv *fpriv;
>       struct amdgpu_bo_param bp;
>       struct amdgpu_bo *bo;
>       struct dma_resv *resv;
> @@ -535,7 +535,10 @@ int amdgpu_vm_pt_create(struct amdgpu_device
> *adev, struct amdgpu_vm *vm,
>
>       bp.type = ttm_bo_type_kernel;
>       bp.no_wait_gpu = immediate;
> -     bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
> +     if (vm->vm_attach_to_fpriv) {
> +             fpriv = container_of(vm, struct amdgpu_fpriv, vm);
> +             bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
> +     }
>
>       if (vm->root.bo)
>               bp.resv = vm->root.bo->tbo.base.resv; @@ -561,7 +564,10
> @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct
> amdgpu_vm *vm,
>       bp.type = ttm_bo_type_kernel;
>       bp.resv = bo->tbo.base.resv;
>       bp.bo_ptr_size = sizeof(struct amdgpu_bo);
> -     bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
> +     if (vm->vm_attach_to_fpriv) {
> +             fpriv = container_of(vm, struct amdgpu_fpriv, vm);
> +             bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
> +     }
>
>       r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
>
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] drm/amdgpu: add a flag to indicate if a VM is attached to fpriv
  2023-05-24  9:23 [PATCH] drm/amdgpu: add a flag to indicate if a VM is attached to fpriv Guchun Chen
  2023-05-26  2:15 ` Chen, Guchun
@ 2023-05-26  7:36 ` Christian König
  1 sibling, 0 replies; 3+ messages in thread
From: Christian König @ 2023-05-26  7:36 UTC (permalink / raw)
  To: Guchun Chen, amd-gfx, alexander.deucher, hawking.zhang,
	lijo.lazar, Philip.Yang, Felix.Kuehling

Am 24.05.23 um 11:23 schrieb Guchun Chen:
> Recent code stores xcp_id to amdgpu bo for accounting memory
> usage or find correct KFD node, and this xcp_id is from file
> private data after opening device. However, not all VMs are
> attached to this fpriv structure like the case in amdgpu_mes_self_test.
> So add a flag to differentiate the cases. Otherwise, KASAN will
> complain out of bound access.

The VM code shouldn't touch the fpriv in any way possible in the first 
place.

Instead we should provide the xcp_id during amdgpu_vm_init() and use it 
to allocate the root PD. All other PDs/PTs should then just use the 
xcp_id from the root PD.

Christian.

>
> [   77.292314] BUG: KASAN: slab-out-of-bounds in amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
> [   77.293845] Read of size 4 at addr ffff888102c48a48 by task modprobe/1069
> [   77.294146] Call Trace:
> [   77.294178]  <TASK>
> [   77.294208]  dump_stack_lvl+0x49/0x63
> [   77.294260]  print_report+0x16f/0x4a6
> [   77.294307]  ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
> [   77.295979]  ? kasan_complete_mode_report_info+0x3c/0x200
> [   77.296057]  ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
> [   77.297556]  kasan_report+0xb4/0x130
> [   77.297609]  ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
> [   77.299202]  __asan_load4+0x6f/0x90
> [   77.299272]  amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
> [   77.300796]  ? amdgpu_init+0x6e/0x1000 [amdgpu]
> [   77.302222]  ? amdgpu_vm_pt_clear+0x750/0x750 [amdgpu]
> [   77.303721]  ? preempt_count_sub+0x18/0xc0
> [   77.303786]  amdgpu_vm_init+0x39e/0x870 [amdgpu]
> [   77.305186]  ? amdgpu_vm_wait_idle+0x90/0x90 [amdgpu]
> [   77.306683]  ? kasan_set_track+0x25/0x30
> [   77.306737]  ? kasan_save_alloc_info+0x1b/0x30
> [   77.306795]  ? __kasan_kmalloc+0x87/0xa0
> [   77.306852]  amdgpu_mes_self_test+0x169/0x620 [amdgpu]
>
> Fixes: ffc6deb773f7("drm/amdkfd: Store xcp partition id to amdgpu bo")
> Signed-off-by: Guchun Chen <guchun.chen@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c   |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  5 ++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  5 ++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 12 +++++++++---
>   5 files changed, 19 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 41d047e5de69..79b80f9233db 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -1229,7 +1229,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
>   		pasid = 0;
>   	}
>   
> -	r = amdgpu_vm_init(adev, &fpriv->vm);
> +	r = amdgpu_vm_init(adev, &fpriv->vm, true);
>   	if (r)
>   		goto error_pasid;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> index 49bb6c03d606..3be5219edf88 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> @@ -1345,7 +1345,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
>   		goto error_pasid;
>   	}
>   
> -	r = amdgpu_vm_init(adev, vm);
> +	r = amdgpu_vm_init(adev, vm, false);
>   	if (r) {
>   		DRM_ERROR("failed to initialize vm\n");
>   		goto error_pasid;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 37b9d8a8dbec..47ffaa1526a0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2099,13 +2099,15 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
>    *
>    * @adev: amdgpu_device pointer
>    * @vm: requested vm
> + * @vm_attach_to_fpriv: flag to tell if vm is attached to file private data
>    *
>    * Init @vm fields.
>    *
>    * Returns:
>    * 0 for success, error for failure.
>    */
> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> +		   bool vm_attach_to_fpriv)
>   {
>   	struct amdgpu_bo *root_bo;
>   	struct amdgpu_bo_vm *root;
> @@ -2131,6 +2133,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   
>   	vm->pte_support_ats = false;
>   	vm->is_compute_context = false;
> +	vm->vm_attach_to_fpriv = vm_attach_to_fpriv;
>   
>   	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
>   				    AMDGPU_VM_USE_CPU_FOR_GFX);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index d551fca1780e..62ed14b1fc16 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -333,6 +333,9 @@ struct amdgpu_vm {
>   	/* Flag to indicate if VM is used for compute */
>   	bool			is_compute_context;
>   
> +	/* Flag to tell if VM is attached to file private data */
> +	bool vm_attach_to_fpriv;
> +
>   	/* Memory partition number, -1 means any partition */
>   	int8_t			mem_id;
>   };
> @@ -392,7 +395,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   			u32 pasid);
>   
>   long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, bool vm_attach_to_fpriv);
>   int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
>   void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
>   void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> index cc3b1b596e56..16b3350aa896 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> @@ -502,7 +502,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   			int level, bool immediate, struct amdgpu_bo_vm **vmbo)
>   {
> -	struct amdgpu_fpriv *fpriv = container_of(vm, struct amdgpu_fpriv, vm);
> +	struct amdgpu_fpriv *fpriv;
>   	struct amdgpu_bo_param bp;
>   	struct amdgpu_bo *bo;
>   	struct dma_resv *resv;
> @@ -535,7 +535,10 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   
>   	bp.type = ttm_bo_type_kernel;
>   	bp.no_wait_gpu = immediate;
> -	bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
> +	if (vm->vm_attach_to_fpriv) {
> +		fpriv = container_of(vm, struct amdgpu_fpriv, vm);
> +		bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
> +	}
>   
>   	if (vm->root.bo)
>   		bp.resv = vm->root.bo->tbo.base.resv;
> @@ -561,7 +564,10 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	bp.type = ttm_bo_type_kernel;
>   	bp.resv = bo->tbo.base.resv;
>   	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
> -	bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
> +	if (vm->vm_attach_to_fpriv) {
> +		fpriv = container_of(vm, struct amdgpu_fpriv, vm);
> +		bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
> +	}
>   
>   	r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
>   


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-05-26  7:36 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-05-24  9:23 [PATCH] drm/amdgpu: add a flag to indicate if a VM is attached to fpriv Guchun Chen
2023-05-26  2:15 ` Chen, Guchun
2023-05-26  7:36 ` Christian König

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox