All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdkfd: disable SVM for GC 10.1.3/4
@ 2023-09-07 12:08 Lang Yu
  2023-09-07 20:03 ` Felix Kuehling
  0 siblings, 1 reply; 3+ messages in thread
From: Lang Yu @ 2023-09-07 12:08 UTC (permalink / raw)
  To: amd-gfx; +Cc: Philip Yang, Felix Kuehling, Lang Yu

GC 10.1.3/4 have problems with TLB_FLUSH_HEAVYWEIGHT
which is used by SVM in svm_range_unmap_from_gpus().
This causes problems on GC 10.1.3/4.

Signed-off-by: Lang Yu <Lang.Yu@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 7d82c7da223a..dd3db3d88d59 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -992,6 +992,22 @@ static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
 /* Each VRAM page uses sizeof(struct page) on system memory */
 #define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page))
 
+static inline bool is_zone_device_needed(struct amdgpu_device *adev)
+{
+	/* Page migration works on gfx9 or newer */
+	if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 1))
+		return false;
+
+	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 3) ||
+	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 4))
+		return false;
+
+	if (adev->gmc.is_app_apu)
+		return false;
+
+	return true;
+}
+
 int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
 {
 	struct amdgpu_kfd_dev *kfddev = &adev->kfd;
@@ -1000,11 +1016,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
 	unsigned long size;
 	void *r;
 
-	/* Page migration works on gfx9 or newer */
-	if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 1))
-		return -EINVAL;
-
-	if (adev->gmc.is_app_apu)
+	if (!is_zone_device_needed(adev))
 		return 0;
 
 	pgmap = &kfddev->pgmap;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] drm/amdkfd: disable SVM for GC 10.1.3/4
  2023-09-07 12:08 [PATCH] drm/amdkfd: disable SVM for GC 10.1.3/4 Lang Yu
@ 2023-09-07 20:03 ` Felix Kuehling
  2023-09-08  2:27   ` Lang Yu
  0 siblings, 1 reply; 3+ messages in thread
From: Felix Kuehling @ 2023-09-07 20:03 UTC (permalink / raw)
  To: Lang Yu, amd-gfx; +Cc: Philip Yang

We need heavy-weight flushes not just for SVM. If this is broken it will 
affect ROCm either way.

Regards,
   Felix


On 2023-09-07 08:08, Lang Yu wrote:
> GC 10.1.3/4 have problems with TLB_FLUSH_HEAVYWEIGHT
> which is used by SVM in svm_range_unmap_from_gpus().
> This causes problems on GC 10.1.3/4.
>
> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 22 +++++++++++++++++-----
>   1 file changed, 17 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index 7d82c7da223a..dd3db3d88d59 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -992,6 +992,22 @@ static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
>   /* Each VRAM page uses sizeof(struct page) on system memory */
>   #define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page))
>   
> +static inline bool is_zone_device_needed(struct amdgpu_device *adev)
> +{
> +	/* Page migration works on gfx9 or newer */
> +	if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 1))
> +		return false;
> +
> +	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 3) ||
> +	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 4))
> +		return false;
> +
> +	if (adev->gmc.is_app_apu)
> +		return false;
> +
> +	return true;
> +}
> +
>   int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
>   {
>   	struct amdgpu_kfd_dev *kfddev = &adev->kfd;
> @@ -1000,11 +1016,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
>   	unsigned long size;
>   	void *r;
>   
> -	/* Page migration works on gfx9 or newer */
> -	if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 1))
> -		return -EINVAL;
> -
> -	if (adev->gmc.is_app_apu)
> +	if (!is_zone_device_needed(adev))
>   		return 0;
>   
>   	pgmap = &kfddev->pgmap;

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] drm/amdkfd: disable SVM for GC 10.1.3/4
  2023-09-07 20:03 ` Felix Kuehling
@ 2023-09-08  2:27   ` Lang Yu
  0 siblings, 0 replies; 3+ messages in thread
From: Lang Yu @ 2023-09-08  2:27 UTC (permalink / raw)
  To: Felix Kuehling; +Cc: Philip Yang, amd-gfx

On 09/07/ , Felix Kuehling wrote:
> We need heavy-weight flushes not just for SVM. If this is broken it will
> affect ROCm either way.

Currently, TLB_FLUSH_HEAVYWEIGHT is called in 2 places,

1, kfd_ioctl_unmap_memory_from_gpu()

Under following conditions. 

KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);

2, svm_range_unmap_from_gpus()

Unconditional.

That means TLB_FLUSH_HEAVYWEIGHT defect won't affect ROCm if 
we don't use SVM for ASICs except MI series.

Regards,
Lang

> Regards,
>   Felix
> 
> 
> On 2023-09-07 08:08, Lang Yu wrote:
> > GC 10.1.3/4 have problems with TLB_FLUSH_HEAVYWEIGHT
> > which is used by SVM in svm_range_unmap_from_gpus().
> > This causes problems on GC 10.1.3/4.
> > 
> > Signed-off-by: Lang Yu <Lang.Yu@amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 22 +++++++++++++++++-----
> >   1 file changed, 17 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> > index 7d82c7da223a..dd3db3d88d59 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> > @@ -992,6 +992,22 @@ static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
> >   /* Each VRAM page uses sizeof(struct page) on system memory */
> >   #define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page))
> > +static inline bool is_zone_device_needed(struct amdgpu_device *adev)
> > +{
> > +	/* Page migration works on gfx9 or newer */
> > +	if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 1))
> > +		return false;
> > +
> > +	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 3) ||
> > +	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 4))
> > +		return false;
> > +
> > +	if (adev->gmc.is_app_apu)
> > +		return false;
> > +
> > +	return true;
> > +}
> > +
> >   int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
> >   {
> >   	struct amdgpu_kfd_dev *kfddev = &adev->kfd;
> > @@ -1000,11 +1016,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
> >   	unsigned long size;
> >   	void *r;
> > -	/* Page migration works on gfx9 or newer */
> > -	if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 1))
> > -		return -EINVAL;
> > -
> > -	if (adev->gmc.is_app_apu)
> > +	if (!is_zone_device_needed(adev))
> >   		return 0;
> >   	pgmap = &kfddev->pgmap;

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-09-08  2:27 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-09-07 12:08 [PATCH] drm/amdkfd: disable SVM for GC 10.1.3/4 Lang Yu
2023-09-07 20:03 ` Felix Kuehling
2023-09-08  2:27   ` Lang Yu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.