Linux kernel -stable discussions
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu/discovery: fix fw based ip discovery
@ 2025-07-30 15:59 Alex Deucher
  2025-08-04 14:16 ` Alex Deucher
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Alex Deucher @ 2025-07-30 15:59 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher, stable

We only need the fw based discovery table for sysfs.  No
need to parse it.  Additionally parsing some of the board
specific tables may result in incorrect data on some boards.
just load the binary and don't parse it on those boards.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4441
Fixes: 80a0e8282933 ("drm/amdgpu/discovery: optionally use fw based ip discovery")
Cc: stable@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 72 ++++++++++---------
 2 files changed, 41 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index efe98ffb679a4..b2538cff222ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2570,9 +2570,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 
 	adev->firmware.gpu_info_fw = NULL;
 
-	if (adev->mman.discovery_bin)
-		return 0;
-
 	switch (adev->asic_type) {
 	default:
 		return 0;
@@ -2594,6 +2591,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 		chip_name = "arcturus";
 		break;
 	case CHIP_NAVI12:
+		if (adev->mman.discovery_bin)
+			return 0;
 		chip_name = "navi12";
 		break;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 81b3443c8d7f4..27bd7659961e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -2555,40 +2555,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
 
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
-	case CHIP_VEGA12:
-	case CHIP_RAVEN:
-	case CHIP_VEGA20:
-	case CHIP_ARCTURUS:
-	case CHIP_ALDEBARAN:
-		/* this is not fatal.  We have a fallback below
-		 * if the new firmwares are not present. some of
-		 * this will be overridden below to keep things
-		 * consistent with the current behavior.
+		/* This is not fatal.  We only need the discovery
+		 * binary for sysfs.  We don't need it for a
+		 * functional system.
 		 */
-		r = amdgpu_discovery_reg_base_init(adev);
-		if (!r) {
-			amdgpu_discovery_harvest_ip(adev);
-			amdgpu_discovery_get_gfx_info(adev);
-			amdgpu_discovery_get_mall_info(adev);
-			amdgpu_discovery_get_vcn_info(adev);
-		}
-		break;
-	default:
-		r = amdgpu_discovery_reg_base_init(adev);
-		if (r) {
-			drm_err(&adev->ddev, "discovery failed: %d\n", r);
-			return r;
-		}
-
-		amdgpu_discovery_harvest_ip(adev);
-		amdgpu_discovery_get_gfx_info(adev);
-		amdgpu_discovery_get_mall_info(adev);
-		amdgpu_discovery_get_vcn_info(adev);
-		break;
-	}
-
-	switch (adev->asic_type) {
-	case CHIP_VEGA10:
+		amdgpu_discovery_init(adev);
 		vega10_reg_base_init(adev);
 		adev->sdma.num_instances = 2;
 		adev->gmc.num_umc = 4;
@@ -2611,6 +2582,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
 		adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 0);
 		break;
 	case CHIP_VEGA12:
+		/* This is not fatal.  We only need the discovery
+		 * binary for sysfs.  We don't need it for a
+		 * functional system.
+		 */
+		amdgpu_discovery_init(adev);
 		vega10_reg_base_init(adev);
 		adev->sdma.num_instances = 2;
 		adev->gmc.num_umc = 4;
@@ -2633,6 +2609,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
 		adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 1);
 		break;
 	case CHIP_RAVEN:
+		/* This is not fatal.  We only need the discovery
+		 * binary for sysfs.  We don't need it for a
+		 * functional system.
+		 */
+		amdgpu_discovery_init(adev);
 		vega10_reg_base_init(adev);
 		adev->sdma.num_instances = 1;
 		adev->vcn.num_vcn_inst = 1;
@@ -2674,6 +2655,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
 		}
 		break;
 	case CHIP_VEGA20:
+		/* This is not fatal.  We only need the discovery
+		 * binary for sysfs.  We don't need it for a
+		 * functional system.
+		 */
+		amdgpu_discovery_init(adev);
 		vega20_reg_base_init(adev);
 		adev->sdma.num_instances = 2;
 		adev->gmc.num_umc = 8;
@@ -2697,6 +2683,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
 		adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 1, 0);
 		break;
 	case CHIP_ARCTURUS:
+		/* This is not fatal.  We only need the discovery
+		 * binary for sysfs.  We don't need it for a
+		 * functional system.
+		 */
+		amdgpu_discovery_init(adev);
 		arct_reg_base_init(adev);
 		adev->sdma.num_instances = 8;
 		adev->vcn.num_vcn_inst = 2;
@@ -2725,6 +2716,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
 		adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 5, 0);
 		break;
 	case CHIP_ALDEBARAN:
+		/* This is not fatal.  We only need the discovery
+		 * binary for sysfs.  We don't need it for a
+		 * functional system.
+		 */
+		amdgpu_discovery_init(adev);
 		aldebaran_reg_base_init(adev);
 		adev->sdma.num_instances = 5;
 		adev->vcn.num_vcn_inst = 2;
@@ -2751,6 +2747,16 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
 		adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0);
 		break;
 	default:
+		r = amdgpu_discovery_reg_base_init(adev);
+		if (r) {
+			drm_err(&adev->ddev, "discovery failed: %d\n", r);
+			return r;
+		}
+
+		amdgpu_discovery_harvest_ip(adev);
+		amdgpu_discovery_get_gfx_info(adev);
+		amdgpu_discovery_get_mall_info(adev);
+		amdgpu_discovery_get_vcn_info(adev);
 		break;
 	}
 
-- 
2.50.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] drm/amdgpu/discovery: fix fw based ip discovery
  2025-07-30 15:59 [PATCH] drm/amdgpu/discovery: fix fw based ip discovery Alex Deucher
@ 2025-08-04 14:16 ` Alex Deucher
  2025-08-06 14:17   ` Alex Deucher
  2025-08-06 14:27 ` Mario Limonciello
  2025-08-06 15:17 ` Lazar, Lijo
  2 siblings, 1 reply; 5+ messages in thread
From: Alex Deucher @ 2025-08-04 14:16 UTC (permalink / raw)
  To: Alex Deucher; +Cc: amd-gfx, stable

Ping?

Alex

On Wed, Jul 30, 2025 at 12:18 PM Alex Deucher <alexander.deucher@amd.com> wrote:
>
> We only need the fw based discovery table for sysfs.  No
> need to parse it.  Additionally parsing some of the board
> specific tables may result in incorrect data on some boards.
> just load the binary and don't parse it on those boards.
>
> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4441
> Fixes: 80a0e8282933 ("drm/amdgpu/discovery: optionally use fw based ip discovery")
> Cc: stable@vger.kernel.org
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  5 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 72 ++++++++++---------
>  2 files changed, 41 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index efe98ffb679a4..b2538cff222ce 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2570,9 +2570,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
>
>         adev->firmware.gpu_info_fw = NULL;
>
> -       if (adev->mman.discovery_bin)
> -               return 0;
> -
>         switch (adev->asic_type) {
>         default:
>                 return 0;
> @@ -2594,6 +2591,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
>                 chip_name = "arcturus";
>                 break;
>         case CHIP_NAVI12:
> +               if (adev->mman.discovery_bin)
> +                       return 0;
>                 chip_name = "navi12";
>                 break;
>         }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> index 81b3443c8d7f4..27bd7659961e8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> @@ -2555,40 +2555,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>
>         switch (adev->asic_type) {
>         case CHIP_VEGA10:
> -       case CHIP_VEGA12:
> -       case CHIP_RAVEN:
> -       case CHIP_VEGA20:
> -       case CHIP_ARCTURUS:
> -       case CHIP_ALDEBARAN:
> -               /* this is not fatal.  We have a fallback below
> -                * if the new firmwares are not present. some of
> -                * this will be overridden below to keep things
> -                * consistent with the current behavior.
> +               /* This is not fatal.  We only need the discovery
> +                * binary for sysfs.  We don't need it for a
> +                * functional system.
>                  */
> -               r = amdgpu_discovery_reg_base_init(adev);
> -               if (!r) {
> -                       amdgpu_discovery_harvest_ip(adev);
> -                       amdgpu_discovery_get_gfx_info(adev);
> -                       amdgpu_discovery_get_mall_info(adev);
> -                       amdgpu_discovery_get_vcn_info(adev);
> -               }
> -               break;
> -       default:
> -               r = amdgpu_discovery_reg_base_init(adev);
> -               if (r) {
> -                       drm_err(&adev->ddev, "discovery failed: %d\n", r);
> -                       return r;
> -               }
> -
> -               amdgpu_discovery_harvest_ip(adev);
> -               amdgpu_discovery_get_gfx_info(adev);
> -               amdgpu_discovery_get_mall_info(adev);
> -               amdgpu_discovery_get_vcn_info(adev);
> -               break;
> -       }
> -
> -       switch (adev->asic_type) {
> -       case CHIP_VEGA10:
> +               amdgpu_discovery_init(adev);
>                 vega10_reg_base_init(adev);
>                 adev->sdma.num_instances = 2;
>                 adev->gmc.num_umc = 4;
> @@ -2611,6 +2582,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>                 adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 0);
>                 break;
>         case CHIP_VEGA12:
> +               /* This is not fatal.  We only need the discovery
> +                * binary for sysfs.  We don't need it for a
> +                * functional system.
> +                */
> +               amdgpu_discovery_init(adev);
>                 vega10_reg_base_init(adev);
>                 adev->sdma.num_instances = 2;
>                 adev->gmc.num_umc = 4;
> @@ -2633,6 +2609,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>                 adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 1);
>                 break;
>         case CHIP_RAVEN:
> +               /* This is not fatal.  We only need the discovery
> +                * binary for sysfs.  We don't need it for a
> +                * functional system.
> +                */
> +               amdgpu_discovery_init(adev);
>                 vega10_reg_base_init(adev);
>                 adev->sdma.num_instances = 1;
>                 adev->vcn.num_vcn_inst = 1;
> @@ -2674,6 +2655,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>                 }
>                 break;
>         case CHIP_VEGA20:
> +               /* This is not fatal.  We only need the discovery
> +                * binary for sysfs.  We don't need it for a
> +                * functional system.
> +                */
> +               amdgpu_discovery_init(adev);
>                 vega20_reg_base_init(adev);
>                 adev->sdma.num_instances = 2;
>                 adev->gmc.num_umc = 8;
> @@ -2697,6 +2683,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>                 adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 1, 0);
>                 break;
>         case CHIP_ARCTURUS:
> +               /* This is not fatal.  We only need the discovery
> +                * binary for sysfs.  We don't need it for a
> +                * functional system.
> +                */
> +               amdgpu_discovery_init(adev);
>                 arct_reg_base_init(adev);
>                 adev->sdma.num_instances = 8;
>                 adev->vcn.num_vcn_inst = 2;
> @@ -2725,6 +2716,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>                 adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 5, 0);
>                 break;
>         case CHIP_ALDEBARAN:
> +               /* This is not fatal.  We only need the discovery
> +                * binary for sysfs.  We don't need it for a
> +                * functional system.
> +                */
> +               amdgpu_discovery_init(adev);
>                 aldebaran_reg_base_init(adev);
>                 adev->sdma.num_instances = 5;
>                 adev->vcn.num_vcn_inst = 2;
> @@ -2751,6 +2747,16 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>                 adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0);
>                 break;
>         default:
> +               r = amdgpu_discovery_reg_base_init(adev);
> +               if (r) {
> +                       drm_err(&adev->ddev, "discovery failed: %d\n", r);
> +                       return r;
> +               }
> +
> +               amdgpu_discovery_harvest_ip(adev);
> +               amdgpu_discovery_get_gfx_info(adev);
> +               amdgpu_discovery_get_mall_info(adev);
> +               amdgpu_discovery_get_vcn_info(adev);
>                 break;
>         }
>
> --
> 2.50.1
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] drm/amdgpu/discovery: fix fw based ip discovery
  2025-08-04 14:16 ` Alex Deucher
@ 2025-08-06 14:17   ` Alex Deucher
  0 siblings, 0 replies; 5+ messages in thread
From: Alex Deucher @ 2025-08-06 14:17 UTC (permalink / raw)
  To: Alex Deucher; +Cc: amd-gfx, stable

Ping again?  This fixes a regression.

Alex

On Mon, Aug 4, 2025 at 10:16 AM Alex Deucher <alexdeucher@gmail.com> wrote:
>
> Ping?
>
> Alex
>
> On Wed, Jul 30, 2025 at 12:18 PM Alex Deucher <alexander.deucher@amd.com> wrote:
> >
> > We only need the fw based discovery table for sysfs.  No
> > need to parse it.  Additionally parsing some of the board
> > specific tables may result in incorrect data on some boards.
> > just load the binary and don't parse it on those boards.
> >
> > Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4441
> > Fixes: 80a0e8282933 ("drm/amdgpu/discovery: optionally use fw based ip discovery")
> > Cc: stable@vger.kernel.org
> > Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  5 +-
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 72 ++++++++++---------
> >  2 files changed, 41 insertions(+), 36 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > index efe98ffb679a4..b2538cff222ce 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > @@ -2570,9 +2570,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
> >
> >         adev->firmware.gpu_info_fw = NULL;
> >
> > -       if (adev->mman.discovery_bin)
> > -               return 0;
> > -
> >         switch (adev->asic_type) {
> >         default:
> >                 return 0;
> > @@ -2594,6 +2591,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
> >                 chip_name = "arcturus";
> >                 break;
> >         case CHIP_NAVI12:
> > +               if (adev->mman.discovery_bin)
> > +                       return 0;
> >                 chip_name = "navi12";
> >                 break;
> >         }
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> > index 81b3443c8d7f4..27bd7659961e8 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> > @@ -2555,40 +2555,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
> >
> >         switch (adev->asic_type) {
> >         case CHIP_VEGA10:
> > -       case CHIP_VEGA12:
> > -       case CHIP_RAVEN:
> > -       case CHIP_VEGA20:
> > -       case CHIP_ARCTURUS:
> > -       case CHIP_ALDEBARAN:
> > -               /* this is not fatal.  We have a fallback below
> > -                * if the new firmwares are not present. some of
> > -                * this will be overridden below to keep things
> > -                * consistent with the current behavior.
> > +               /* This is not fatal.  We only need the discovery
> > +                * binary for sysfs.  We don't need it for a
> > +                * functional system.
> >                  */
> > -               r = amdgpu_discovery_reg_base_init(adev);
> > -               if (!r) {
> > -                       amdgpu_discovery_harvest_ip(adev);
> > -                       amdgpu_discovery_get_gfx_info(adev);
> > -                       amdgpu_discovery_get_mall_info(adev);
> > -                       amdgpu_discovery_get_vcn_info(adev);
> > -               }
> > -               break;
> > -       default:
> > -               r = amdgpu_discovery_reg_base_init(adev);
> > -               if (r) {
> > -                       drm_err(&adev->ddev, "discovery failed: %d\n", r);
> > -                       return r;
> > -               }
> > -
> > -               amdgpu_discovery_harvest_ip(adev);
> > -               amdgpu_discovery_get_gfx_info(adev);
> > -               amdgpu_discovery_get_mall_info(adev);
> > -               amdgpu_discovery_get_vcn_info(adev);
> > -               break;
> > -       }
> > -
> > -       switch (adev->asic_type) {
> > -       case CHIP_VEGA10:
> > +               amdgpu_discovery_init(adev);
> >                 vega10_reg_base_init(adev);
> >                 adev->sdma.num_instances = 2;
> >                 adev->gmc.num_umc = 4;
> > @@ -2611,6 +2582,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
> >                 adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 0);
> >                 break;
> >         case CHIP_VEGA12:
> > +               /* This is not fatal.  We only need the discovery
> > +                * binary for sysfs.  We don't need it for a
> > +                * functional system.
> > +                */
> > +               amdgpu_discovery_init(adev);
> >                 vega10_reg_base_init(adev);
> >                 adev->sdma.num_instances = 2;
> >                 adev->gmc.num_umc = 4;
> > @@ -2633,6 +2609,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
> >                 adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 1);
> >                 break;
> >         case CHIP_RAVEN:
> > +               /* This is not fatal.  We only need the discovery
> > +                * binary for sysfs.  We don't need it for a
> > +                * functional system.
> > +                */
> > +               amdgpu_discovery_init(adev);
> >                 vega10_reg_base_init(adev);
> >                 adev->sdma.num_instances = 1;
> >                 adev->vcn.num_vcn_inst = 1;
> > @@ -2674,6 +2655,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
> >                 }
> >                 break;
> >         case CHIP_VEGA20:
> > +               /* This is not fatal.  We only need the discovery
> > +                * binary for sysfs.  We don't need it for a
> > +                * functional system.
> > +                */
> > +               amdgpu_discovery_init(adev);
> >                 vega20_reg_base_init(adev);
> >                 adev->sdma.num_instances = 2;
> >                 adev->gmc.num_umc = 8;
> > @@ -2697,6 +2683,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
> >                 adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 1, 0);
> >                 break;
> >         case CHIP_ARCTURUS:
> > +               /* This is not fatal.  We only need the discovery
> > +                * binary for sysfs.  We don't need it for a
> > +                * functional system.
> > +                */
> > +               amdgpu_discovery_init(adev);
> >                 arct_reg_base_init(adev);
> >                 adev->sdma.num_instances = 8;
> >                 adev->vcn.num_vcn_inst = 2;
> > @@ -2725,6 +2716,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
> >                 adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 5, 0);
> >                 break;
> >         case CHIP_ALDEBARAN:
> > +               /* This is not fatal.  We only need the discovery
> > +                * binary for sysfs.  We don't need it for a
> > +                * functional system.
> > +                */
> > +               amdgpu_discovery_init(adev);
> >                 aldebaran_reg_base_init(adev);
> >                 adev->sdma.num_instances = 5;
> >                 adev->vcn.num_vcn_inst = 2;
> > @@ -2751,6 +2747,16 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
> >                 adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0);
> >                 break;
> >         default:
> > +               r = amdgpu_discovery_reg_base_init(adev);
> > +               if (r) {
> > +                       drm_err(&adev->ddev, "discovery failed: %d\n", r);
> > +                       return r;
> > +               }
> > +
> > +               amdgpu_discovery_harvest_ip(adev);
> > +               amdgpu_discovery_get_gfx_info(adev);
> > +               amdgpu_discovery_get_mall_info(adev);
> > +               amdgpu_discovery_get_vcn_info(adev);
> >                 break;
> >         }
> >
> > --
> > 2.50.1
> >

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] drm/amdgpu/discovery: fix fw based ip discovery
  2025-07-30 15:59 [PATCH] drm/amdgpu/discovery: fix fw based ip discovery Alex Deucher
  2025-08-04 14:16 ` Alex Deucher
@ 2025-08-06 14:27 ` Mario Limonciello
  2025-08-06 15:17 ` Lazar, Lijo
  2 siblings, 0 replies; 5+ messages in thread
From: Mario Limonciello @ 2025-08-06 14:27 UTC (permalink / raw)
  To: Alex Deucher, amd-gfx; +Cc: stable

On 7/30/2025 10:59 AM, Alex Deucher wrote:
> We only need the fw based discovery table for sysfs.  No
> need to parse it.  Additionally parsing some of the board
> specific tables may result in incorrect data on some boards.
> just load the binary and don't parse it on those boards.
> 
> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4441
> Fixes: 80a0e8282933 ("drm/amdgpu/discovery: optionally use fw based ip discovery")
> Cc: stable@vger.kernel.org
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  5 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 72 ++++++++++---------
>   2 files changed, 41 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index efe98ffb679a4..b2538cff222ce 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2570,9 +2570,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
>   
>   	adev->firmware.gpu_info_fw = NULL;
>   
> -	if (adev->mman.discovery_bin)
> -		return 0;
> -
>   	switch (adev->asic_type) {
>   	default:
>   		return 0;
> @@ -2594,6 +2591,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
>   		chip_name = "arcturus";
>   		break;
>   	case CHIP_NAVI12:
> +		if (adev->mman.discovery_bin)
> +			return 0;
>   		chip_name = "navi12";
>   		break;
>   	}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> index 81b3443c8d7f4..27bd7659961e8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> @@ -2555,40 +2555,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>   
>   	switch (adev->asic_type) {
>   	case CHIP_VEGA10:
> -	case CHIP_VEGA12:
> -	case CHIP_RAVEN:
> -	case CHIP_VEGA20:
> -	case CHIP_ARCTURUS:
> -	case CHIP_ALDEBARAN:
> -		/* this is not fatal.  We have a fallback below
> -		 * if the new firmwares are not present. some of
> -		 * this will be overridden below to keep things
> -		 * consistent with the current behavior.
> +		/* This is not fatal.  We only need the discovery
> +		 * binary for sysfs.  We don't need it for a
> +		 * functional system.
>   		 */
> -		r = amdgpu_discovery_reg_base_init(adev);
> -		if (!r) {
> -			amdgpu_discovery_harvest_ip(adev);
> -			amdgpu_discovery_get_gfx_info(adev);
> -			amdgpu_discovery_get_mall_info(adev);
> -			amdgpu_discovery_get_vcn_info(adev);
> -		}
> -		break;
> -	default:
> -		r = amdgpu_discovery_reg_base_init(adev);
> -		if (r) {
> -			drm_err(&adev->ddev, "discovery failed: %d\n", r);
> -			return r;
> -		}
> -
> -		amdgpu_discovery_harvest_ip(adev);
> -		amdgpu_discovery_get_gfx_info(adev);
> -		amdgpu_discovery_get_mall_info(adev);
> -		amdgpu_discovery_get_vcn_info(adev);
> -		break;
> -	}
> -
> -	switch (adev->asic_type) {
> -	case CHIP_VEGA10:
> +		amdgpu_discovery_init(adev);
>   		vega10_reg_base_init(adev);
>   		adev->sdma.num_instances = 2;
>   		adev->gmc.num_umc = 4;
> @@ -2611,6 +2582,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>   		adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 0);
>   		break;
>   	case CHIP_VEGA12:
> +		/* This is not fatal.  We only need the discovery
> +		 * binary for sysfs.  We don't need it for a
> +		 * functional system.
> +		 */
> +		amdgpu_discovery_init(adev);
>   		vega10_reg_base_init(adev);
>   		adev->sdma.num_instances = 2;
>   		adev->gmc.num_umc = 4;
> @@ -2633,6 +2609,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>   		adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 1);
>   		break;
>   	case CHIP_RAVEN:
> +		/* This is not fatal.  We only need the discovery
> +		 * binary for sysfs.  We don't need it for a
> +		 * functional system.
> +		 */
> +		amdgpu_discovery_init(adev);
>   		vega10_reg_base_init(adev);
>   		adev->sdma.num_instances = 1;
>   		adev->vcn.num_vcn_inst = 1;
> @@ -2674,6 +2655,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>   		}
>   		break;
>   	case CHIP_VEGA20:
> +		/* This is not fatal.  We only need the discovery
> +		 * binary for sysfs.  We don't need it for a
> +		 * functional system.
> +		 */
> +		amdgpu_discovery_init(adev);
>   		vega20_reg_base_init(adev);
>   		adev->sdma.num_instances = 2;
>   		adev->gmc.num_umc = 8;
> @@ -2697,6 +2683,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>   		adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 1, 0);
>   		break;
>   	case CHIP_ARCTURUS:
> +		/* This is not fatal.  We only need the discovery
> +		 * binary for sysfs.  We don't need it for a
> +		 * functional system.
> +		 */
> +		amdgpu_discovery_init(adev);
>   		arct_reg_base_init(adev);
>   		adev->sdma.num_instances = 8;
>   		adev->vcn.num_vcn_inst = 2;
> @@ -2725,6 +2716,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>   		adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 5, 0);
>   		break;
>   	case CHIP_ALDEBARAN:
> +		/* This is not fatal.  We only need the discovery
> +		 * binary for sysfs.  We don't need it for a
> +		 * functional system.
> +		 */
> +		amdgpu_discovery_init(adev);
>   		aldebaran_reg_base_init(adev);
>   		adev->sdma.num_instances = 5;
>   		adev->vcn.num_vcn_inst = 2;
> @@ -2751,6 +2747,16 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>   		adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0);
>   		break;
>   	default:
> +		r = amdgpu_discovery_reg_base_init(adev);
> +		if (r) {
> +			drm_err(&adev->ddev, "discovery failed: %d\n", r);
> +			return r;
> +		}
> +
> +		amdgpu_discovery_harvest_ip(adev);
> +		amdgpu_discovery_get_gfx_info(adev);
> +		amdgpu_discovery_get_mall_info(adev);
> +		amdgpu_discovery_get_vcn_info(adev);
>   		break;
>   	}
>   


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] drm/amdgpu/discovery: fix fw based ip discovery
  2025-07-30 15:59 [PATCH] drm/amdgpu/discovery: fix fw based ip discovery Alex Deucher
  2025-08-04 14:16 ` Alex Deucher
  2025-08-06 14:27 ` Mario Limonciello
@ 2025-08-06 15:17 ` Lazar, Lijo
  2 siblings, 0 replies; 5+ messages in thread
From: Lazar, Lijo @ 2025-08-06 15:17 UTC (permalink / raw)
  To: Alex Deucher, amd-gfx; +Cc: stable



On 7/30/2025 9:29 PM, Alex Deucher wrote:
> We only need the fw based discovery table for sysfs.  No
> need to parse it.  Additionally parsing some of the board
> specific tables may result in incorrect data on some boards.
> just load the binary and don't parse it on those boards.
> 
> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4441
> Fixes: 80a0e8282933 ("drm/amdgpu/discovery: optionally use fw based ip discovery")
> Cc: stable@vger.kernel.org
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

One generic question - if discovery content is completely ignored by
driver, how external tool using sysfs could consume the data? Wouldn't
there be a mismatch in the config?

Thanks,
Lijo

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  5 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 72 ++++++++++---------
>  2 files changed, 41 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index efe98ffb679a4..b2538cff222ce 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2570,9 +2570,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
>  
>  	adev->firmware.gpu_info_fw = NULL;
>  
> -	if (adev->mman.discovery_bin)
> -		return 0;
> -
>  	switch (adev->asic_type) {
>  	default:
>  		return 0;
> @@ -2594,6 +2591,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
>  		chip_name = "arcturus";
>  		break;
>  	case CHIP_NAVI12:
> +		if (adev->mman.discovery_bin)
> +			return 0;
>  		chip_name = "navi12";
>  		break;
>  	}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> index 81b3443c8d7f4..27bd7659961e8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> @@ -2555,40 +2555,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>  
>  	switch (adev->asic_type) {
>  	case CHIP_VEGA10:
> -	case CHIP_VEGA12:
> -	case CHIP_RAVEN:
> -	case CHIP_VEGA20:
> -	case CHIP_ARCTURUS:
> -	case CHIP_ALDEBARAN:
> -		/* this is not fatal.  We have a fallback below
> -		 * if the new firmwares are not present. some of
> -		 * this will be overridden below to keep things
> -		 * consistent with the current behavior.
> +		/* This is not fatal.  We only need the discovery
> +		 * binary for sysfs.  We don't need it for a
> +		 * functional system.
>  		 */
> -		r = amdgpu_discovery_reg_base_init(adev);
> -		if (!r) {
> -			amdgpu_discovery_harvest_ip(adev);
> -			amdgpu_discovery_get_gfx_info(adev);
> -			amdgpu_discovery_get_mall_info(adev);
> -			amdgpu_discovery_get_vcn_info(adev);
> -		}
> -		break;
> -	default:
> -		r = amdgpu_discovery_reg_base_init(adev);
> -		if (r) {
> -			drm_err(&adev->ddev, "discovery failed: %d\n", r);
> -			return r;
> -		}
> -
> -		amdgpu_discovery_harvest_ip(adev);
> -		amdgpu_discovery_get_gfx_info(adev);
> -		amdgpu_discovery_get_mall_info(adev);
> -		amdgpu_discovery_get_vcn_info(adev);
> -		break;
> -	}
> -
> -	switch (adev->asic_type) {
> -	case CHIP_VEGA10:
> +		amdgpu_discovery_init(adev);
>  		vega10_reg_base_init(adev);
>  		adev->sdma.num_instances = 2;
>  		adev->gmc.num_umc = 4;
> @@ -2611,6 +2582,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>  		adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 0);
>  		break;
>  	case CHIP_VEGA12:
> +		/* This is not fatal.  We only need the discovery
> +		 * binary for sysfs.  We don't need it for a
> +		 * functional system.
> +		 */
> +		amdgpu_discovery_init(adev);
>  		vega10_reg_base_init(adev);
>  		adev->sdma.num_instances = 2;
>  		adev->gmc.num_umc = 4;
> @@ -2633,6 +2609,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>  		adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 1);
>  		break;
>  	case CHIP_RAVEN:
> +		/* This is not fatal.  We only need the discovery
> +		 * binary for sysfs.  We don't need it for a
> +		 * functional system.
> +		 */
> +		amdgpu_discovery_init(adev);
>  		vega10_reg_base_init(adev);
>  		adev->sdma.num_instances = 1;
>  		adev->vcn.num_vcn_inst = 1;
> @@ -2674,6 +2655,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>  		}
>  		break;
>  	case CHIP_VEGA20:
> +		/* This is not fatal.  We only need the discovery
> +		 * binary for sysfs.  We don't need it for a
> +		 * functional system.
> +		 */
> +		amdgpu_discovery_init(adev);
>  		vega20_reg_base_init(adev);
>  		adev->sdma.num_instances = 2;
>  		adev->gmc.num_umc = 8;
> @@ -2697,6 +2683,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>  		adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 1, 0);
>  		break;
>  	case CHIP_ARCTURUS:
> +		/* This is not fatal.  We only need the discovery
> +		 * binary for sysfs.  We don't need it for a
> +		 * functional system.
> +		 */
> +		amdgpu_discovery_init(adev);
>  		arct_reg_base_init(adev);
>  		adev->sdma.num_instances = 8;
>  		adev->vcn.num_vcn_inst = 2;
> @@ -2725,6 +2716,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>  		adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 5, 0);
>  		break;
>  	case CHIP_ALDEBARAN:
> +		/* This is not fatal.  We only need the discovery
> +		 * binary for sysfs.  We don't need it for a
> +		 * functional system.
> +		 */
> +		amdgpu_discovery_init(adev);
>  		aldebaran_reg_base_init(adev);
>  		adev->sdma.num_instances = 5;
>  		adev->vcn.num_vcn_inst = 2;
> @@ -2751,6 +2747,16 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
>  		adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0);
>  		break;
>  	default:
> +		r = amdgpu_discovery_reg_base_init(adev);
> +		if (r) {
> +			drm_err(&adev->ddev, "discovery failed: %d\n", r);
> +			return r;
> +		}
> +
> +		amdgpu_discovery_harvest_ip(adev);
> +		amdgpu_discovery_get_gfx_info(adev);
> +		amdgpu_discovery_get_mall_info(adev);
> +		amdgpu_discovery_get_vcn_info(adev);
>  		break;
>  	}
>  


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2025-08-06 15:17 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-30 15:59 [PATCH] drm/amdgpu/discovery: fix fw based ip discovery Alex Deucher
2025-08-04 14:16 ` Alex Deucher
2025-08-06 14:17   ` Alex Deucher
2025-08-06 14:27 ` Mario Limonciello
2025-08-06 15:17 ` Lazar, Lijo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox