* [PATCH 1/1] drm/amdkfd: Cleanup vm process info if init vm failed
@ 2022-12-20 20:36 Philip Yang
2022-12-20 23:28 ` Felix Kuehling
0 siblings, 1 reply; 2+ messages in thread
From: Philip Yang @ 2022-12-20 20:36 UTC (permalink / raw)
To: amd-gfx; +Cc: Philip Yang, felix.kuehling
If acquire_vm failed when initializing KFD vm, set vm->process_info to
NULL and free process info, otherwise, the future acquire_vm will
always fail as vm->process_info is not NULL.
Pass avm as parameter to remove the duplicate code.
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 ++--
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 18 ++----------------
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 12 ++++++++++--
3 files changed, 14 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 0040deaf8a83..fb41869e357a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -271,9 +271,9 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_
((struct drm_file *)(drm_priv))->driver_priv)->vm)
int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
- struct file *filp, u32 pasid);
+ struct amdgpu_vm *avm, u32 pasid);
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
- struct file *filp,
+ struct amdgpu_vm *avm,
void **process_info,
struct dma_fence **ef);
void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index b15091d8310d..2a118669d0e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1430,18 +1430,11 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
}
int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
- struct file *filp, u32 pasid)
+ struct amdgpu_vm *avm, u32 pasid)
{
- struct amdgpu_fpriv *drv_priv;
- struct amdgpu_vm *avm;
int ret;
- ret = amdgpu_file_to_fpriv(filp, &drv_priv);
- if (ret)
- return ret;
- avm = &drv_priv->vm;
-
/* Free the original amdgpu allocated pasid,
* will be replaced with kfd allocated pasid.
*/
@@ -1458,19 +1451,12 @@ int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
}
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
- struct file *filp,
+ struct amdgpu_vm *avm,
void **process_info,
struct dma_fence **ef)
{
- struct amdgpu_fpriv *drv_priv;
- struct amdgpu_vm *avm;
int ret;
- ret = amdgpu_file_to_fpriv(filp, &drv_priv);
- if (ret)
- return ret;
- avm = &drv_priv->vm;
-
/* Already a compute VM? */
if (avm->process_info)
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 51b1683ac5c1..71db24fefe05 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1563,6 +1563,8 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
int kfd_process_device_init_vm(struct kfd_process_device *pdd,
struct file *drm_file)
{
+ struct amdgpu_fpriv *drv_priv;
+ struct amdgpu_vm *avm;
struct kfd_process *p;
struct kfd_dev *dev;
int ret;
@@ -1573,10 +1575,15 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
if (pdd->drm_priv)
return -EBUSY;
+ ret = amdgpu_file_to_fpriv(drm_file, &drv_priv);
+ if (ret)
+ return ret;
+ avm = &drv_priv->vm;
+
p = pdd->process;
dev = pdd->dev;
- ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, drm_file,
+ ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm,
&p->kgd_process_info,
&p->ef);
if (ret) {
@@ -1593,7 +1600,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
if (ret)
goto err_init_cwsr;
- ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, drm_file, p->pasid);
+ ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, avm, p->pasid);
if (ret)
goto err_set_pasid;
@@ -1607,6 +1614,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
kfd_process_device_destroy_ib_mem(pdd);
err_reserve_ib_mem:
pdd->drm_priv = NULL;
+ amdgpu_amdkfd_gpuvm_destroy_cb(dev->adev, avm);
return ret;
}
--
2.35.1
^ permalink raw reply related [flat|nested] 2+ messages in thread* Re: [PATCH 1/1] drm/amdkfd: Cleanup vm process info if init vm failed
2022-12-20 20:36 [PATCH 1/1] drm/amdkfd: Cleanup vm process info if init vm failed Philip Yang
@ 2022-12-20 23:28 ` Felix Kuehling
0 siblings, 0 replies; 2+ messages in thread
From: Felix Kuehling @ 2022-12-20 23:28 UTC (permalink / raw)
To: Philip Yang, amd-gfx
On 2022-12-20 15:36, Philip Yang wrote:
> If acquire_vm failed when initializing KFD vm, set vm->process_info to
> NULL and free process info, otherwise, the future acquire_vm will
> always fail as vm->process_info is not NULL.
>
> Pass avm as parameter to remove the duplicate code.
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
I'm still curious what caused the acquire_vm failure in the first place.
Regards,
Felix
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 ++--
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 18 ++----------------
> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 12 ++++++++++--
> 3 files changed, 14 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 0040deaf8a83..fb41869e357a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -271,9 +271,9 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_
> ((struct drm_file *)(drm_priv))->driver_priv)->vm)
>
> int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
> - struct file *filp, u32 pasid);
> + struct amdgpu_vm *avm, u32 pasid);
> int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
> - struct file *filp,
> + struct amdgpu_vm *avm,
> void **process_info,
> struct dma_fence **ef);
> void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index b15091d8310d..2a118669d0e3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1430,18 +1430,11 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
> }
>
> int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
> - struct file *filp, u32 pasid)
> + struct amdgpu_vm *avm, u32 pasid)
>
> {
> - struct amdgpu_fpriv *drv_priv;
> - struct amdgpu_vm *avm;
> int ret;
>
> - ret = amdgpu_file_to_fpriv(filp, &drv_priv);
> - if (ret)
> - return ret;
> - avm = &drv_priv->vm;
> -
> /* Free the original amdgpu allocated pasid,
> * will be replaced with kfd allocated pasid.
> */
> @@ -1458,19 +1451,12 @@ int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
> }
>
> int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
> - struct file *filp,
> + struct amdgpu_vm *avm,
> void **process_info,
> struct dma_fence **ef)
> {
> - struct amdgpu_fpriv *drv_priv;
> - struct amdgpu_vm *avm;
> int ret;
>
> - ret = amdgpu_file_to_fpriv(filp, &drv_priv);
> - if (ret)
> - return ret;
> - avm = &drv_priv->vm;
> -
> /* Already a compute VM? */
> if (avm->process_info)
> return -EINVAL;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 51b1683ac5c1..71db24fefe05 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -1563,6 +1563,8 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
> int kfd_process_device_init_vm(struct kfd_process_device *pdd,
> struct file *drm_file)
> {
> + struct amdgpu_fpriv *drv_priv;
> + struct amdgpu_vm *avm;
> struct kfd_process *p;
> struct kfd_dev *dev;
> int ret;
> @@ -1573,10 +1575,15 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
> if (pdd->drm_priv)
> return -EBUSY;
>
> + ret = amdgpu_file_to_fpriv(drm_file, &drv_priv);
> + if (ret)
> + return ret;
> + avm = &drv_priv->vm;
> +
> p = pdd->process;
> dev = pdd->dev;
>
> - ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, drm_file,
> + ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm,
> &p->kgd_process_info,
> &p->ef);
> if (ret) {
> @@ -1593,7 +1600,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
> if (ret)
> goto err_init_cwsr;
>
> - ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, drm_file, p->pasid);
> + ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, avm, p->pasid);
> if (ret)
> goto err_set_pasid;
>
> @@ -1607,6 +1614,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
> kfd_process_device_destroy_ib_mem(pdd);
> err_reserve_ib_mem:
> pdd->drm_priv = NULL;
> + amdgpu_amdkfd_gpuvm_destroy_cb(dev->adev, avm);
>
> return ret;
> }
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2022-12-20 23:28 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-12-20 20:36 [PATCH 1/1] drm/amdkfd: Cleanup vm process info if init vm failed Philip Yang
2022-12-20 23:28 ` Felix Kuehling
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox