[PATCH RFC RFT] drm/msm: adreno: attach the GMU device to a driver

linux-arm-msm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH RFC RFT] drm/msm: adreno: attach the GMU device to a driver
@ 2025-10-22 12:44 Neil Armstrong
  2025-10-22 17:09 ` Konrad Dybcio
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Neil Armstrong @ 2025-10-22 12:44 UTC (permalink / raw)
  To: Rob Clark, Sean Paul, Konrad Dybcio, Dmitry Baryshkov,
	Abhinav Kumar, Jessica Zhang, Marijn Suijten, David Airlie,
	Simona Vetter
  Cc: linux-arm-msm, dri-devel, freedreno, linux-kernel, Neil Armstrong

Due to the sync_state is enabled by default in pmdomain & CCF since v6.17,
the GCC and GPUCC sync_state would stay pending, leaving the resources in
full performance:
gcc-x1e80100 100000.clock-controller: sync_state() pending due to 3d6a000.gmu
gpucc-x1e80100 3d90000.clock-controller: sync_state() pending due to 3d6a000.gmu

In order to fix this state and allow the GMU to be properly
probed, let's add a proper driver for the GMU and add it to
the MSM driver components.

Only the proper GMU has been tested since I don't have
access to hardware with a GMU wrapper.

Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
---
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c      | 354 ++++++++++++++---------------
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c      |   6 -
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h      |   3 -
 drivers/gpu/drm/msm/adreno/adreno_device.c |   4 +
 drivers/gpu/drm/msm/adreno/adreno_gpu.h    |   4 +
 drivers/gpu/drm/msm/msm_drv.c              |  16 +-
 6 files changed, 192 insertions(+), 195 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index fc62fef2fed8..6e7c3e627509 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -1859,11 +1859,14 @@ void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu)
 	pm_runtime_put(&gpu->pdev->dev);
 }
 
-void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)
+static void a6xx_gmu_unbind(struct device *dev, struct device *master, void *data)
 {
-	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct msm_drm_private *priv = dev_get_drvdata(master);
+	struct msm_gpu *gpu = priv->gpu;
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
-	struct platform_device *pdev = to_platform_device(gmu->dev);
 
 	mutex_lock(&gmu->lock);
 	if (!gmu->initialized) {
@@ -1903,9 +1906,6 @@ void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)
 		free_irq(gmu->gmu_irq, gmu);
 		free_irq(gmu->hfi_irq, gmu);
 	}
-
-	/* Drop reference taken in of_find_device_by_node */
-	put_device(gmu->dev);
 }
 
 static int cxpd_notifier_cb(struct notifier_block *nb,
@@ -1919,169 +1919,130 @@ static int cxpd_notifier_cb(struct notifier_block *nb,
 	return 0;
 }
 
-int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
+static int a6xx_gmu_bind(struct device *dev, struct device *master, void *data)
 {
-	struct platform_device *pdev = of_find_device_by_node(node);
-	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
-	int ret;
-
-	if (!pdev)
-		return -ENODEV;
-
-	gmu->dev = &pdev->dev;
-
-	ret = of_dma_configure(gmu->dev, node, true);
-	if (ret)
-		return ret;
-
-	pm_runtime_enable(gmu->dev);
-
-	/* Mark legacy for manual SPTPRAC control */
-	gmu->legacy = true;
-
-	/* Map the GMU registers */
-	gmu->mmio = a6xx_gmu_get_mmio(pdev, "gmu");
-	if (IS_ERR(gmu->mmio)) {
-		ret = PTR_ERR(gmu->mmio);
-		goto err_mmio;
-	}
-
-	gmu->cxpd = dev_pm_domain_attach_by_name(gmu->dev, "cx");
-	if (IS_ERR(gmu->cxpd)) {
-		ret = PTR_ERR(gmu->cxpd);
-		goto err_mmio;
-	}
-
-	if (!device_link_add(gmu->dev, gmu->cxpd, DL_FLAG_PM_RUNTIME)) {
-		ret = -ENODEV;
-		goto detach_cxpd;
-	}
-
-	init_completion(&gmu->pd_gate);
-	complete_all(&gmu->pd_gate);
-	gmu->pd_nb.notifier_call = cxpd_notifier_cb;
-
-	/* Get a link to the GX power domain to reset the GPU */
-	gmu->gxpd = dev_pm_domain_attach_by_name(gmu->dev, "gx");
-	if (IS_ERR(gmu->gxpd)) {
-		ret = PTR_ERR(gmu->gxpd);
-		goto err_mmio;
-	}
-
-	gmu->initialized = true;
-
-	return 0;
-
-detach_cxpd:
-	dev_pm_domain_detach(gmu->cxpd, false);
-
-err_mmio:
-	iounmap(gmu->mmio);
-
-	/* Drop reference taken in of_find_device_by_node */
-	put_device(gmu->dev);
-
-	return ret;
-}
-
-int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
-{
-	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct msm_drm_private *priv = dev_get_drvdata(master);
+	struct msm_gpu *gpu = dev_to_gpu(&priv->gpu_pdev->dev);
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
-	struct platform_device *pdev = of_find_device_by_node(node);
 	struct device_link *link;
 	int ret;
 
-	if (!pdev)
-		return -ENODEV;
-
-	gmu->dev = &pdev->dev;
+	gmu->dev = dev;
 
-	ret = of_dma_configure(gmu->dev, node, true);
+	ret = of_dma_configure(gmu->dev, dev->of_node, true);
 	if (ret)
 		return ret;
 
-	/* Set GMU idle level */
-	gmu->idle_level = (adreno_gpu->info->quirks & ADRENO_QUIRK_IFPC) ?
-		GMU_IDLE_STATE_IFPC : GMU_IDLE_STATE_ACTIVE;
+	if (adreno_has_gmu_wrapper(adreno_gpu))
+		/* Mark legacy for manual SPTPRAC control */
+		gmu->legacy = true;
+
+	if (!gmu->legacy)
+		/* Set GMU idle level */
+		gmu->idle_level = (adreno_gpu->info->quirks & ADRENO_QUIRK_IFPC) ?
+			GMU_IDLE_STATE_IFPC : GMU_IDLE_STATE_ACTIVE;
 
 	pm_runtime_enable(gmu->dev);
 
-	/* Get the list of clocks */
-	ret = a6xx_gmu_clocks_probe(gmu);
-	if (ret)
-		goto err_put_device;
+	if (!gmu->legacy) {
+		/* Get the list of clocks */
+		ret = a6xx_gmu_clocks_probe(gmu);
+		if (ret)
+			return ret;
 
-	ret = a6xx_gmu_memory_probe(adreno_gpu->base.dev, gmu);
-	if (ret)
-		goto err_put_device;
+		ret = a6xx_gmu_memory_probe(adreno_gpu->base.dev, gmu);
+		if (ret)
+			return ret;
 
+		/* A660 now requires handling "prealloc requests" in GMU firmware
+		 * For now just hardcode allocations based on the known firmware.
+		 * note: there is no indication that these correspond to "dummy" or
+		 * "debug" regions, but this "guess" allows reusing these BOs which
+		 * are otherwise unused by a660.
+		 */
+		gmu->dummy.size = SZ_4K;
+		if (adreno_is_a660_family(adreno_gpu) ||
+		    adreno_is_a7xx(adreno_gpu)) {
+			ret = a6xx_gmu_memory_alloc(gmu, &gmu->debug, SZ_4K * 7,
+						    0x60400000, "debug");
+			if (ret)
+				goto err_memory;
+
+			gmu->dummy.size = SZ_8K;
+		}
 
-	/* A660 now requires handling "prealloc requests" in GMU firmware
-	 * For now just hardcode allocations based on the known firmware.
-	 * note: there is no indication that these correspond to "dummy" or
-	 * "debug" regions, but this "guess" allows reusing these BOs which
-	 * are otherwise unused by a660.
-	 */
-	gmu->dummy.size = SZ_4K;
-	if (adreno_is_a660_family(adreno_gpu) ||
-	    adreno_is_a7xx(adreno_gpu)) {
-		ret = a6xx_gmu_memory_alloc(gmu, &gmu->debug, SZ_4K * 7,
-					    0x60400000, "debug");
+		/* Allocate memory for the GMU dummy page */
+		ret = a6xx_gmu_memory_alloc(gmu, &gmu->dummy, gmu->dummy.size,
+					    0x60000000, "dummy");
 		if (ret)
 			goto err_memory;
 
-		gmu->dummy.size = SZ_8K;
-	}
-
-	/* Allocate memory for the GMU dummy page */
-	ret = a6xx_gmu_memory_alloc(gmu, &gmu->dummy, gmu->dummy.size,
-				    0x60000000, "dummy");
-	if (ret)
-		goto err_memory;
+		/* Note that a650 family also includes a660 family: */
+		if (adreno_is_a650_family(adreno_gpu) ||
+		    adreno_is_a7xx(adreno_gpu)) {
+			ret = a6xx_gmu_memory_alloc(gmu, &gmu->icache,
+				SZ_16M - SZ_16K, 0x04000, "icache");
+			if (ret)
+				goto err_memory;
+		/*
+		 * NOTE: when porting legacy ("pre-650-family") GPUs you may be tempted to add a condition
+		 * to allocate icache/dcache here, as per downstream code flow, but it may not actually be
+		 * necessary. If you omit this step and you don't get random pagefaults, you are likely
+		 * good to go without this!
+		 */
+		} else if (adreno_is_a640_family(adreno_gpu)) {
+			ret = a6xx_gmu_memory_alloc(gmu, &gmu->icache,
+				SZ_256K - SZ_16K, 0x04000, "icache");
+			if (ret)
+				goto err_memory;
+
+			ret = a6xx_gmu_memory_alloc(gmu, &gmu->dcache,
+				SZ_256K - SZ_16K, 0x44000, "dcache");
+			if (ret)
+				goto err_memory;
+		} else if (adreno_is_a630_family(adreno_gpu)) {
+			/* HFI v1, has sptprac */
+			gmu->legacy = true;
+
+			/* Allocate memory for the GMU debug region */
+			ret = a6xx_gmu_memory_alloc(gmu, &gmu->debug, SZ_16K, 0, "debug");
+			if (ret)
+				goto err_memory;
+		}
 
-	/* Note that a650 family also includes a660 family: */
-	if (adreno_is_a650_family(adreno_gpu) ||
-	    adreno_is_a7xx(adreno_gpu)) {
-		ret = a6xx_gmu_memory_alloc(gmu, &gmu->icache,
-			SZ_16M - SZ_16K, 0x04000, "icache");
-		if (ret)
-			goto err_memory;
-	/*
-	 * NOTE: when porting legacy ("pre-650-family") GPUs you may be tempted to add a condition
-	 * to allocate icache/dcache here, as per downstream code flow, but it may not actually be
-	 * necessary. If you omit this step and you don't get random pagefaults, you are likely
-	 * good to go without this!
-	 */
-	} else if (adreno_is_a640_family(adreno_gpu)) {
-		ret = a6xx_gmu_memory_alloc(gmu, &gmu->icache,
-			SZ_256K - SZ_16K, 0x04000, "icache");
+		/* Allocate memory for the GMU log region */
+		ret = a6xx_gmu_memory_alloc(gmu, &gmu->log, SZ_16K, 0, "log");
 		if (ret)
 			goto err_memory;
 
-		ret = a6xx_gmu_memory_alloc(gmu, &gmu->dcache,
-			SZ_256K - SZ_16K, 0x44000, "dcache");
+		/* Allocate memory for for the HFI queues */
+		ret = a6xx_gmu_memory_alloc(gmu, &gmu->hfi, SZ_16K, 0, "hfi");
 		if (ret)
 			goto err_memory;
-	} else if (adreno_is_a630_family(adreno_gpu)) {
-		/* HFI v1, has sptprac */
-		gmu->legacy = true;
 
-		/* Allocate memory for the GMU debug region */
-		ret = a6xx_gmu_memory_alloc(gmu, &gmu->debug, SZ_16K, 0, "debug");
-		if (ret)
-			goto err_memory;
-	}
+		if (adreno_is_a650_family(adreno_gpu) ||
+		    adreno_is_a7xx(adreno_gpu)) {
+			gmu->rscc = a6xx_gmu_get_mmio(pdev, "rscc");
+			if (IS_ERR(gmu->rscc)) {
+				ret = -ENODEV;
+				goto err_mmio;
+			}
+		} else {
+			gmu->rscc = gmu->mmio + 0x23000;
+		}
 
-	/* Allocate memory for the GMU log region */
-	ret = a6xx_gmu_memory_alloc(gmu, &gmu->log, SZ_16K, 0, "log");
-	if (ret)
-		goto err_memory;
+		/* Get the HFI and GMU interrupts */
+		gmu->hfi_irq = a6xx_gmu_get_irq(gmu, pdev, "hfi", a6xx_hfi_irq);
+		gmu->gmu_irq = a6xx_gmu_get_irq(gmu, pdev, "gmu", a6xx_gmu_irq);
 
-	/* Allocate memory for for the HFI queues */
-	ret = a6xx_gmu_memory_alloc(gmu, &gmu->hfi, SZ_16K, 0, "hfi");
-	if (ret)
-		goto err_memory;
+		if (gmu->hfi_irq < 0 || gmu->gmu_irq < 0) {
+			ret = -ENODEV;
+			goto err_mmio;
+		}
+	}
 
 	/* Map the GMU registers */
 	gmu->mmio = a6xx_gmu_get_mmio(pdev, "gmu");
@@ -2090,26 +2051,6 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
 		goto err_memory;
 	}
 
-	if (adreno_is_a650_family(adreno_gpu) ||
-	    adreno_is_a7xx(adreno_gpu)) {
-		gmu->rscc = a6xx_gmu_get_mmio(pdev, "rscc");
-		if (IS_ERR(gmu->rscc)) {
-			ret = -ENODEV;
-			goto err_mmio;
-		}
-	} else {
-		gmu->rscc = gmu->mmio + 0x23000;
-	}
-
-	/* Get the HFI and GMU interrupts */
-	gmu->hfi_irq = a6xx_gmu_get_irq(gmu, pdev, "hfi", a6xx_hfi_irq);
-	gmu->gmu_irq = a6xx_gmu_get_irq(gmu, pdev, "gmu", a6xx_gmu_irq);
-
-	if (gmu->hfi_irq < 0 || gmu->gmu_irq < 0) {
-		ret = -ENODEV;
-		goto err_mmio;
-	}
-
 	gmu->cxpd = dev_pm_domain_attach_by_name(gmu->dev, "cx");
 	if (IS_ERR(gmu->cxpd)) {
 		ret = PTR_ERR(gmu->cxpd);
@@ -2122,11 +2063,13 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
 		goto detach_cxpd;
 	}
 
-	/* Other errors are handled during GPU ACD probe */
-	gmu->qmp = qmp_get(gmu->dev);
-	if (PTR_ERR_OR_ZERO(gmu->qmp) == -EPROBE_DEFER) {
-		ret = -EPROBE_DEFER;
-		goto detach_gxpd;
+	if (!gmu->legacy) {
+		/* Other errors are handled during GPU ACD probe */
+		gmu->qmp = qmp_get(gmu->dev);
+		if (PTR_ERR_OR_ZERO(gmu->qmp) == -EPROBE_DEFER) {
+			ret = -EPROBE_DEFER;
+			goto detach_gxpd;
+		}
 	}
 
 	init_completion(&gmu->pd_gate);
@@ -2139,18 +2082,20 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
 	 */
 	gmu->gxpd = dev_pm_domain_attach_by_name(gmu->dev, "gx");
 
-	/* Get the power levels for the GMU and GPU */
-	a6xx_gmu_pwrlevels_probe(gmu);
+	if (!gmu->legacy) {
+		/* Get the power levels for the GMU and GPU */
+		a6xx_gmu_pwrlevels_probe(gmu);
 
-	ret = a6xx_gmu_acd_probe(gmu);
-	if (ret)
-		goto detach_gxpd;
+		ret = a6xx_gmu_acd_probe(gmu);
+		if (ret)
+			goto detach_gxpd;
 
-	/* Set up the HFI queues */
-	a6xx_hfi_init(gmu);
+		/* Set up the HFI queues */
+		a6xx_hfi_init(gmu);
 
-	/* Initialize RPMh */
-	a6xx_gmu_rpmh_init(gmu);
+		/* Initialize RPMh */
+		a6xx_gmu_rpmh_init(gmu);
+	}
 
 	gmu->initialized = true;
 
@@ -2170,16 +2115,57 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
 
 err_mmio:
 	iounmap(gmu->mmio);
-	if (platform_get_resource_byname(pdev, IORESOURCE_MEM, "rscc"))
-		iounmap(gmu->rscc);
-	free_irq(gmu->gmu_irq, gmu);
-	free_irq(gmu->hfi_irq, gmu);
+	if (!gmu->legacy) {
+		if (platform_get_resource_byname(pdev, IORESOURCE_MEM, "rscc"))
+			iounmap(gmu->rscc);
+		free_irq(gmu->gmu_irq, gmu);
+		free_irq(gmu->hfi_irq, gmu);
+	}
 
 err_memory:
-	a6xx_gmu_memory_free(gmu);
-err_put_device:
-	/* Drop reference taken in of_find_device_by_node */
-	put_device(gmu->dev);
+	if (!gmu->legacy)
+		a6xx_gmu_memory_free(gmu);
 
 	return ret;
 }
+
+static const struct component_ops a6xx_gmu_ops = {
+	.bind   = a6xx_gmu_bind,
+	.unbind = a6xx_gmu_unbind,
+};
+
+static int a6xx_gmu_probe(struct platform_device *pdev)
+{
+	return component_add(&pdev->dev, &a6xx_gmu_ops);
+}
+
+static void a6xx_gmu_remove(struct platform_device *pdev)
+{
+
+	component_del(&pdev->dev, &a6xx_gmu_ops);
+}
+
+static const struct of_device_id dt_match[] = {
+	{ .compatible = "qcom,adreno-gmu" },
+	{ .compatible = "qcom,adreno-gmu-wrapper" },
+	{}
+};
+
+static struct platform_driver a6xx_gmu_drv = {
+	.probe = a6xx_gmu_probe,
+	.remove = a6xx_gmu_remove,
+	.driver = {
+		.name = "a6xx_gmu",
+		.of_match_table = dt_match,
+	},
+};
+
+void __init a6xx_gmu_register(void)
+{
+	platform_driver_register(&a6xx_gmu_drv);
+}
+
+void __exit a6xx_gmu_unregister(void)
+{
+	platform_driver_unregister(&a6xx_gmu_drv);
+}
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index b8f8ae940b55..d418f49f47a1 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -2352,8 +2352,6 @@ static void a6xx_destroy(struct msm_gpu *gpu)
 
 	a6xx_llc_slices_destroy(a6xx_gpu);
 
-	a6xx_gmu_remove(a6xx_gpu);
-
 	adreno_gpu_cleanup(adreno_gpu);
 
 	kfree(a6xx_gpu);
@@ -2689,10 +2687,6 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
 	if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
 		priv->gpu_clamp_to_idle = true;
 
-	if (adreno_has_gmu_wrapper(adreno_gpu))
-		ret = a6xx_gmu_wrapper_init(a6xx_gpu, node);
-	else
-		ret = a6xx_gmu_init(a6xx_gpu, node);
 	of_node_put(node);
 	if (ret) {
 		a6xx_destroy(&(a6xx_gpu->base.base));
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
index 0b17d36c36a9..070af413e5ad 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
@@ -252,9 +252,6 @@ bool a6xx_gmu_isidle(struct a6xx_gmu *gmu);
 int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state);
 void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state);
 
-int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node);
-int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node);
-void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu);
 void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu);
 
 void a6xx_preempt_init(struct msm_gpu *gpu);
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 28f744f3caf7..9f9b5863a8de 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -415,6 +415,8 @@ void __init adreno_register(void)
 		return;
 
 	platform_driver_register(&adreno_driver);
+
+	a6xx_gmu_register();
 }
 
 void __exit adreno_unregister(void)
@@ -422,5 +424,7 @@ void __exit adreno_unregister(void)
 	if (skip_gpu)
 		return;
 
+	a6xx_gmu_unregister();
+
 	platform_driver_unregister(&adreno_driver);
 }
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 390fa6720d9b..d3a653adbc72 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -678,6 +678,10 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev);
 struct msm_gpu *a4xx_gpu_init(struct drm_device *dev);
 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev);
 struct msm_gpu *a6xx_gpu_init(struct drm_device *dev);
+struct msm_gpu *a6xx_gpu_init(struct drm_device *dev);
+
+void __init a6xx_gmu_register(void);
+void __exit a6xx_gmu_unregister(void);
 
 static inline uint32_t get_wptr(struct msm_ringbuffer *ring)
 {
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 7e977fec4100..0618da7e8b40 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -998,18 +998,30 @@ static const struct of_device_id msm_gpu_match[] = {
 	{ },
 };
 
+static const struct of_device_id msm_gmu_match[] = {
+	{ .compatible = "qcom,adreno-gmu" },
+	{ .compatible = "qcom,adreno-gmu-wrapper" },
+	{ },
+};
+
 static int add_gpu_components(struct device *dev,
 			      struct component_match **matchptr)
 {
-	struct device_node *np;
+	struct device_node *np, *gmu;
 
 	np = of_find_matching_node(NULL, msm_gpu_match);
 	if (!np)
 		return 0;
 
-	if (of_device_is_available(np) && adreno_has_gpu(np))
+	if (of_device_is_available(np) && adreno_has_gpu(np)) {
 		drm_of_component_match_add(dev, matchptr, component_compare_of, np);
 
+		gmu = of_find_matching_node(NULL, msm_gmu_match);
+		if (of_device_is_available(gmu))
+			drm_of_component_match_add(dev, matchptr, component_compare_of, gmu);
+		of_node_put(gmu);
+	}
+
 	of_node_put(np);
 
 	return 0;

---
base-commit: 211ddde0823f1442e4ad052a2f30f050145ccada
change-id: 20251022-topic-adreno-attach-gmu-to-driver-e47025fd7ebb

Best regards,
-- 
Neil Armstrong <neil.armstrong@linaro.org>


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC RFT] drm/msm: adreno: attach the GMU device to a driver
  2025-10-22 12:44 [PATCH RFC RFT] drm/msm: adreno: attach the GMU device to a driver Neil Armstrong
@ 2025-10-22 17:09 ` Konrad Dybcio
  2025-10-23  8:27   ` Neil Armstrong
  2025-10-26  1:31 ` Jens Reidel
  2025-10-30 21:29 ` Akhil P Oommen
  2 siblings, 1 reply; 13+ messages in thread
From: Konrad Dybcio @ 2025-10-22 17:09 UTC (permalink / raw)
  To: Neil Armstrong, Rob Clark, Sean Paul, Konrad Dybcio,
	Dmitry Baryshkov, Abhinav Kumar, Jessica Zhang, Marijn Suijten,
	David Airlie, Simona Vetter
  Cc: linux-arm-msm, dri-devel, freedreno, linux-kernel

On 10/22/25 2:44 PM, Neil Armstrong wrote:
> Due to the sync_state is enabled by default in pmdomain & CCF since v6.17,
> the GCC and GPUCC sync_state would stay pending, leaving the resources in
> full performance:
> gcc-x1e80100 100000.clock-controller: sync_state() pending due to 3d6a000.gmu
> gpucc-x1e80100 3d90000.clock-controller: sync_state() pending due to 3d6a000.gmu

Does this *actually* cause any harm, by the way?

For example on x1e, GMU refers to 2 GPU_CC GDSCs, GPU_CC refers
to a pair of GCC clocks and GCC refers to VDD_CX

and I see these prints, yet:

/sys/kernel/debug/pm_genpd/gpu_cx_gdsc/current_state:off-0
/sys/kernel/debug/pm_genpd/gpu_gx_gdsc/current_state:off-0

/sys/kernel/debug/pm_genpd/cx/current_state:on
/sys/kernel/debug/pm_genpd/cx/perf_state:256 # because of USB3 votes

I'm not super sure where that sync_state comes from either (maybe
dev_set_drv_sync_state in pmdomain/core?)

Konrad

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC RFT] drm/msm: adreno: attach the GMU device to a driver
  2025-10-22 17:09 ` Konrad Dybcio
@ 2025-10-23  8:27   ` Neil Armstrong
  2025-10-24  8:55     ` Konrad Dybcio
  0 siblings, 1 reply; 13+ messages in thread
From: Neil Armstrong @ 2025-10-23  8:27 UTC (permalink / raw)
  To: Konrad Dybcio, Rob Clark, Sean Paul, Konrad Dybcio,
	Dmitry Baryshkov, Abhinav Kumar, Jessica Zhang, Marijn Suijten,
	David Airlie, Simona Vetter
  Cc: linux-arm-msm, dri-devel, freedreno, linux-kernel

On 10/22/25 19:09, Konrad Dybcio wrote:
> On 10/22/25 2:44 PM, Neil Armstrong wrote:
>> Due to the sync_state is enabled by default in pmdomain & CCF since v6.17,
>> the GCC and GPUCC sync_state would stay pending, leaving the resources in
>> full performance:
>> gcc-x1e80100 100000.clock-controller: sync_state() pending due to 3d6a000.gmu
>> gpucc-x1e80100 3d90000.clock-controller: sync_state() pending due to 3d6a000.gmu
> 
> Does this *actually* cause any harm, by the way?

?

> 
> For example on x1e, GMU refers to 2 GPU_CC GDSCs, GPU_CC refers
> to a pair of GCC clocks and GCC refers to VDD_CX
> 
> and I see these prints, yet:
> 
> /sys/kernel/debug/pm_genpd/gpu_cx_gdsc/current_state:off-0
> /sys/kernel/debug/pm_genpd/gpu_gx_gdsc/current_state:off-0
> 
> /sys/kernel/debug/pm_genpd/cx/current_state:on
> /sys/kernel/debug/pm_genpd/cx/perf_state:256 # because of USB3 votes
> 
> I'm not super sure where that sync_state comes from either (maybe
> dev_set_drv_sync_state in pmdomain/core?)

The way we handle the GMU so far is wrong, it abuses the driver model.

And this is a symptom, whatever the impact it has, it needs to be fixed
in a proper way.

The sync_state is retained because the gmu device is never probed but
has some clocks and power domains attached to it, which is clearly wrong.

Neil

> 
> Konrad


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC RFT] drm/msm: adreno: attach the GMU device to a driver
  2025-10-23  8:27   ` Neil Armstrong
@ 2025-10-24  8:55     ` Konrad Dybcio
  2025-10-24  9:11       ` Neil Armstrong
  0 siblings, 1 reply; 13+ messages in thread
From: Konrad Dybcio @ 2025-10-24  8:55 UTC (permalink / raw)
  To: Neil Armstrong, Rob Clark, Sean Paul, Konrad Dybcio,
	Dmitry Baryshkov, Abhinav Kumar, Jessica Zhang, Marijn Suijten,
	David Airlie, Simona Vetter
  Cc: linux-arm-msm, dri-devel, freedreno, linux-kernel

On 10/23/25 10:27 AM, Neil Armstrong wrote:
> On 10/22/25 19:09, Konrad Dybcio wrote:
>> On 10/22/25 2:44 PM, Neil Armstrong wrote:
>>> Due to the sync_state is enabled by default in pmdomain & CCF since v6.17,
>>> the GCC and GPUCC sync_state would stay pending, leaving the resources in
>>> full performance:
>>> gcc-x1e80100 100000.clock-controller: sync_state() pending due to 3d6a000.gmu
>>> gpucc-x1e80100 3d90000.clock-controller: sync_state() pending due to 3d6a000.gmu
>>
>> Does this *actually* cause any harm, by the way?
> 
> ?
> 
>>
>> For example on x1e, GMU refers to 2 GPU_CC GDSCs, GPU_CC refers
>> to a pair of GCC clocks and GCC refers to VDD_CX
>>
>> and I see these prints, yet:
>>
>> /sys/kernel/debug/pm_genpd/gpu_cx_gdsc/current_state:off-0
>> /sys/kernel/debug/pm_genpd/gpu_gx_gdsc/current_state:off-0
>>
>> /sys/kernel/debug/pm_genpd/cx/current_state:on
>> /sys/kernel/debug/pm_genpd/cx/perf_state:256 # because of USB3 votes
>>
>> I'm not super sure where that sync_state comes from either (maybe
>> dev_set_drv_sync_state in pmdomain/core?)
> 
> The way we handle the GMU so far is wrong, it abuses the driver model.
> 
> And this is a symptom, whatever the impact it has, it needs to be fixed
> in a proper way.
> 
> The sync_state is retained because the gmu device is never probed but
> has some clocks and power domains attached to it, which is clearly wrong.

Yes I agree, however I'm only debating the commit message claims of
'leaving the resources in full performance', which doesn't seem to be
true

Konrad

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC RFT] drm/msm: adreno: attach the GMU device to a driver
  2025-10-24  8:55     ` Konrad Dybcio
@ 2025-10-24  9:11       ` Neil Armstrong
  2025-10-30  9:52         ` Konrad Dybcio
  0 siblings, 1 reply; 13+ messages in thread
From: Neil Armstrong @ 2025-10-24  9:11 UTC (permalink / raw)
  To: Konrad Dybcio, Rob Clark, Sean Paul, Konrad Dybcio,
	Dmitry Baryshkov, Abhinav Kumar, Jessica Zhang, Marijn Suijten,
	David Airlie, Simona Vetter
  Cc: linux-arm-msm, dri-devel, freedreno, linux-kernel

On 10/24/25 10:55, Konrad Dybcio wrote:
> On 10/23/25 10:27 AM, Neil Armstrong wrote:
>> On 10/22/25 19:09, Konrad Dybcio wrote:
>>> On 10/22/25 2:44 PM, Neil Armstrong wrote:
>>>> Due to the sync_state is enabled by default in pmdomain & CCF since v6.17,
>>>> the GCC and GPUCC sync_state would stay pending, leaving the resources in
>>>> full performance:
>>>> gcc-x1e80100 100000.clock-controller: sync_state() pending due to 3d6a000.gmu
>>>> gpucc-x1e80100 3d90000.clock-controller: sync_state() pending due to 3d6a000.gmu
>>>
>>> Does this *actually* cause any harm, by the way?
>>
>> ?
>>
>>>
>>> For example on x1e, GMU refers to 2 GPU_CC GDSCs, GPU_CC refers
>>> to a pair of GCC clocks and GCC refers to VDD_CX
>>>
>>> and I see these prints, yet:
>>>
>>> /sys/kernel/debug/pm_genpd/gpu_cx_gdsc/current_state:off-0
>>> /sys/kernel/debug/pm_genpd/gpu_gx_gdsc/current_state:off-0
>>>
>>> /sys/kernel/debug/pm_genpd/cx/current_state:on
>>> /sys/kernel/debug/pm_genpd/cx/perf_state:256 # because of USB3 votes
>>>
>>> I'm not super sure where that sync_state comes from either (maybe
>>> dev_set_drv_sync_state in pmdomain/core?)
>>
>> The way we handle the GMU so far is wrong, it abuses the driver model.
>>
>> And this is a symptom, whatever the impact it has, it needs to be fixed
>> in a proper way.
>>
>> The sync_state is retained because the gmu device is never probed but
>> has some clocks and power domains attached to it, which is clearly wrong.
> 
> Yes I agree, however I'm only debating the commit message claims of
> 'leaving the resources in full performance', which doesn't seem to be
> true

OK so the wording may be a little bit extreme, perhaps something like:
the GCC and GPUCC sync_state would stay pending, leaving the unused
power domains enabled for the lifetime of the system.

Neil

> 
> Konrad


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC RFT] drm/msm: adreno: attach the GMU device to a driver
  2025-10-22 12:44 [PATCH RFC RFT] drm/msm: adreno: attach the GMU device to a driver Neil Armstrong
  2025-10-22 17:09 ` Konrad Dybcio
@ 2025-10-26  1:31 ` Jens Reidel
  2025-10-29 10:25   ` Neil Armstrong
  2025-10-30 21:29 ` Akhil P Oommen
  2 siblings, 1 reply; 13+ messages in thread
From: Jens Reidel @ 2025-10-26  1:31 UTC (permalink / raw)
  To: Neil Armstrong, Rob Clark, Sean Paul, Konrad Dybcio,
	Dmitry Baryshkov, Abhinav Kumar, Jessica Zhang, Marijn Suijten,
	David Airlie, Simona Vetter
  Cc: linux-arm-msm, dri-devel, freedreno, linux-kernel

On 10/22/25 14:44, Neil Armstrong wrote:
> Due to the sync_state is enabled by default in pmdomain & CCF since v6.17,
> the GCC and GPUCC sync_state would stay pending, leaving the resources in
> full performance:
> gcc-x1e80100 100000.clock-controller: sync_state() pending due to 3d6a000.gmu
> gpucc-x1e80100 3d90000.clock-controller: sync_state() pending due to 3d6a000.gmu
> 
> In order to fix this state and allow the GMU to be properly
> probed, let's add a proper driver for the GMU and add it to
> the MSM driver components.
> 
> Only the proper GMU has been tested since I don't have
> access to hardware with a GMU wrapper.
> 
> Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
> ---
>   drivers/gpu/drm/msm/adreno/a6xx_gmu.c      | 354 ++++++++++++++---------------
>   drivers/gpu/drm/msm/adreno/a6xx_gpu.c      |   6 -
>   drivers/gpu/drm/msm/adreno/a6xx_gpu.h      |   3 -
>   drivers/gpu/drm/msm/adreno/adreno_device.c |   4 +
>   drivers/gpu/drm/msm/adreno/adreno_gpu.h    |   4 +
>   drivers/gpu/drm/msm/msm_drv.c              |  16 +-
>   6 files changed, 192 insertions(+), 195 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> index fc62fef2fed8..6e7c3e627509 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> @@ -1859,11 +1859,14 @@ void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu)
>   	pm_runtime_put(&gpu->pdev->dev);
>   }
>   
> -void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)
> +static void a6xx_gmu_unbind(struct device *dev, struct device *master, void *data)
>   {
> -	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
> +	struct platform_device *pdev = to_platform_device(dev);
> +	struct msm_drm_private *priv = dev_get_drvdata(master);
> +	struct msm_gpu *gpu = priv->gpu;
> +	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> +	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
>   	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
> -	struct platform_device *pdev = to_platform_device(gmu->dev);
>   
>   	mutex_lock(&gmu->lock);
>   	if (!gmu->initialized) {
> @@ -1903,9 +1906,6 @@ void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)
>   		free_irq(gmu->gmu_irq, gmu);
>   		free_irq(gmu->hfi_irq, gmu);
>   	}
> -
> -	/* Drop reference taken in of_find_device_by_node */
> -	put_device(gmu->dev);
>   }
>   
>   static int cxpd_notifier_cb(struct notifier_block *nb,
> @@ -1919,169 +1919,130 @@ static int cxpd_notifier_cb(struct notifier_block *nb,
>   	return 0;
>   }
>   
> -int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
> +static int a6xx_gmu_bind(struct device *dev, struct device *master, void *data)
>   {
> -	struct platform_device *pdev = of_find_device_by_node(node);
> -	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
> -	int ret;
> -
> -	if (!pdev)
> -		return -ENODEV;
> -
> -	gmu->dev = &pdev->dev;
> -
> -	ret = of_dma_configure(gmu->dev, node, true);
> -	if (ret)
> -		return ret;
> -
> -	pm_runtime_enable(gmu->dev);
> -
> -	/* Mark legacy for manual SPTPRAC control */
> -	gmu->legacy = true;
> -
> -	/* Map the GMU registers */
> -	gmu->mmio = a6xx_gmu_get_mmio(pdev, "gmu");
> -	if (IS_ERR(gmu->mmio)) {
> -		ret = PTR_ERR(gmu->mmio);
> -		goto err_mmio;
> -	}
> -
> -	gmu->cxpd = dev_pm_domain_attach_by_name(gmu->dev, "cx");
> -	if (IS_ERR(gmu->cxpd)) {
> -		ret = PTR_ERR(gmu->cxpd);
> -		goto err_mmio;
> -	}
> -
> -	if (!device_link_add(gmu->dev, gmu->cxpd, DL_FLAG_PM_RUNTIME)) {
> -		ret = -ENODEV;
> -		goto detach_cxpd;
> -	}
> -
> -	init_completion(&gmu->pd_gate);
> -	complete_all(&gmu->pd_gate);
> -	gmu->pd_nb.notifier_call = cxpd_notifier_cb;
> -
> -	/* Get a link to the GX power domain to reset the GPU */
> -	gmu->gxpd = dev_pm_domain_attach_by_name(gmu->dev, "gx");
> -	if (IS_ERR(gmu->gxpd)) {
> -		ret = PTR_ERR(gmu->gxpd);
> -		goto err_mmio;
> -	}
> -
> -	gmu->initialized = true;
> -
> -	return 0;
> -
> -detach_cxpd:
> -	dev_pm_domain_detach(gmu->cxpd, false);
> -
> -err_mmio:
> -	iounmap(gmu->mmio);
> -
> -	/* Drop reference taken in of_find_device_by_node */
> -	put_device(gmu->dev);
> -
> -	return ret;
> -}
> -
> -int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
> -{
> -	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
> +	struct platform_device *pdev = to_platform_device(dev);
> +	struct msm_drm_private *priv = dev_get_drvdata(master);
> +	struct msm_gpu *gpu = dev_to_gpu(&priv->gpu_pdev->dev);
> +	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> +	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
>   	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
> -	struct platform_device *pdev = of_find_device_by_node(node);
>   	struct device_link *link;
>   	int ret;
>   
> -	if (!pdev)
> -		return -ENODEV;
> -
> -	gmu->dev = &pdev->dev;
> +	gmu->dev = dev;
>   
> -	ret = of_dma_configure(gmu->dev, node, true);
> +	ret = of_dma_configure(gmu->dev, dev->of_node, true);
>   	if (ret)
>   		return ret;
>   
> -	/* Set GMU idle level */
> -	gmu->idle_level = (adreno_gpu->info->quirks & ADRENO_QUIRK_IFPC) ?
> -		GMU_IDLE_STATE_IFPC : GMU_IDLE_STATE_ACTIVE;
> +	if (adreno_has_gmu_wrapper(adreno_gpu))
> +		/* Mark legacy for manual SPTPRAC control */
> +		gmu->legacy = true;
> +
> +	if (!gmu->legacy)
> +		/* Set GMU idle level */
> +		gmu->idle_level = (adreno_gpu->info->quirks & ADRENO_QUIRK_IFPC) ?
> +			GMU_IDLE_STATE_IFPC : GMU_IDLE_STATE_ACTIVE;
>   
>   	pm_runtime_enable(gmu->dev);
>   
> -	/* Get the list of clocks */
> -	ret = a6xx_gmu_clocks_probe(gmu);
> -	if (ret)
> -		goto err_put_device;
> +	if (!gmu->legacy) {
> +		/* Get the list of clocks */
> +		ret = a6xx_gmu_clocks_probe(gmu);
> +		if (ret)
> +			return ret;
>   
> -	ret = a6xx_gmu_memory_probe(adreno_gpu->base.dev, gmu);
> -	if (ret)
> -		goto err_put_device;
> +		ret = a6xx_gmu_memory_probe(adreno_gpu->base.dev, gmu);
> +		if (ret)
> +			return ret;
>   
> +		/* A660 now requires handling "prealloc requests" in GMU firmware
> +		 * For now just hardcode allocations based on the known firmware.
> +		 * note: there is no indication that these correspond to "dummy" or
> +		 * "debug" regions, but this "guess" allows reusing these BOs which
> +		 * are otherwise unused by a660.
> +		 */
> +		gmu->dummy.size = SZ_4K;
> +		if (adreno_is_a660_family(adreno_gpu) ||
> +		    adreno_is_a7xx(adreno_gpu)) {
> +			ret = a6xx_gmu_memory_alloc(gmu, &gmu->debug, SZ_4K * 7,
> +						    0x60400000, "debug");
> +			if (ret)
> +				goto err_memory;
> +
> +			gmu->dummy.size = SZ_8K;
> +		}
>   
> -	/* A660 now requires handling "prealloc requests" in GMU firmware
> -	 * For now just hardcode allocations based on the known firmware.
> -	 * note: there is no indication that these correspond to "dummy" or
> -	 * "debug" regions, but this "guess" allows reusing these BOs which
> -	 * are otherwise unused by a660.
> -	 */
> -	gmu->dummy.size = SZ_4K;
> -	if (adreno_is_a660_family(adreno_gpu) ||
> -	    adreno_is_a7xx(adreno_gpu)) {
> -		ret = a6xx_gmu_memory_alloc(gmu, &gmu->debug, SZ_4K * 7,
> -					    0x60400000, "debug");
> +		/* Allocate memory for the GMU dummy page */
> +		ret = a6xx_gmu_memory_alloc(gmu, &gmu->dummy, gmu->dummy.size,
> +					    0x60000000, "dummy");
>   		if (ret)
>   			goto err_memory;
>   
> -		gmu->dummy.size = SZ_8K;
> -	}
> -
> -	/* Allocate memory for the GMU dummy page */
> -	ret = a6xx_gmu_memory_alloc(gmu, &gmu->dummy, gmu->dummy.size,
> -				    0x60000000, "dummy");
> -	if (ret)
> -		goto err_memory;
> +		/* Note that a650 family also includes a660 family: */
> +		if (adreno_is_a650_family(adreno_gpu) ||
> +		    adreno_is_a7xx(adreno_gpu)) {
> +			ret = a6xx_gmu_memory_alloc(gmu, &gmu->icache,
> +				SZ_16M - SZ_16K, 0x04000, "icache");
> +			if (ret)
> +				goto err_memory;
> +		/*
> +		 * NOTE: when porting legacy ("pre-650-family") GPUs you may be tempted to add a condition
> +		 * to allocate icache/dcache here, as per downstream code flow, but it may not actually be
> +		 * necessary. If you omit this step and you don't get random pagefaults, you are likely
> +		 * good to go without this!
> +		 */
> +		} else if (adreno_is_a640_family(adreno_gpu)) {
> +			ret = a6xx_gmu_memory_alloc(gmu, &gmu->icache,
> +				SZ_256K - SZ_16K, 0x04000, "icache");
> +			if (ret)
> +				goto err_memory;
> +
> +			ret = a6xx_gmu_memory_alloc(gmu, &gmu->dcache,
> +				SZ_256K - SZ_16K, 0x44000, "dcache");
> +			if (ret)
> +				goto err_memory;
> +		} else if (adreno_is_a630_family(adreno_gpu)) {
> +			/* HFI v1, has sptprac */
> +			gmu->legacy = true;
> +
> +			/* Allocate memory for the GMU debug region */
> +			ret = a6xx_gmu_memory_alloc(gmu, &gmu->debug, SZ_16K, 0, "debug");
> +			if (ret)
> +				goto err_memory;
> +		}
>   
> -	/* Note that a650 family also includes a660 family: */
> -	if (adreno_is_a650_family(adreno_gpu) ||
> -	    adreno_is_a7xx(adreno_gpu)) {
> -		ret = a6xx_gmu_memory_alloc(gmu, &gmu->icache,
> -			SZ_16M - SZ_16K, 0x04000, "icache");
> -		if (ret)
> -			goto err_memory;
> -	/*
> -	 * NOTE: when porting legacy ("pre-650-family") GPUs you may be tempted to add a condition
> -	 * to allocate icache/dcache here, as per downstream code flow, but it may not actually be
> -	 * necessary. If you omit this step and you don't get random pagefaults, you are likely
> -	 * good to go without this!
> -	 */
> -	} else if (adreno_is_a640_family(adreno_gpu)) {
> -		ret = a6xx_gmu_memory_alloc(gmu, &gmu->icache,
> -			SZ_256K - SZ_16K, 0x04000, "icache");
> +		/* Allocate memory for the GMU log region */
> +		ret = a6xx_gmu_memory_alloc(gmu, &gmu->log, SZ_16K, 0, "log");
>   		if (ret)
>   			goto err_memory;
>   
> -		ret = a6xx_gmu_memory_alloc(gmu, &gmu->dcache,
> -			SZ_256K - SZ_16K, 0x44000, "dcache");
> +		/* Allocate memory for for the HFI queues */
> +		ret = a6xx_gmu_memory_alloc(gmu, &gmu->hfi, SZ_16K, 0, "hfi");
>   		if (ret)
>   			goto err_memory;
> -	} else if (adreno_is_a630_family(adreno_gpu)) {
> -		/* HFI v1, has sptprac */
> -		gmu->legacy = true;
>   
> -		/* Allocate memory for the GMU debug region */
> -		ret = a6xx_gmu_memory_alloc(gmu, &gmu->debug, SZ_16K, 0, "debug");
> -		if (ret)
> -			goto err_memory;
> -	}
> +		if (adreno_is_a650_family(adreno_gpu) ||
> +		    adreno_is_a7xx(adreno_gpu)) {
> +			gmu->rscc = a6xx_gmu_get_mmio(pdev, "rscc");
> +			if (IS_ERR(gmu->rscc)) {
> +				ret = -ENODEV;
> +				goto err_mmio;
> +			}
> +		} else {
> +			gmu->rscc = gmu->mmio + 0x23000;
> +		}
>   
> -	/* Allocate memory for the GMU log region */
> -	ret = a6xx_gmu_memory_alloc(gmu, &gmu->log, SZ_16K, 0, "log");
> -	if (ret)
> -		goto err_memory;
> +		/* Get the HFI and GMU interrupts */
> +		gmu->hfi_irq = a6xx_gmu_get_irq(gmu, pdev, "hfi", a6xx_hfi_irq);
> +		gmu->gmu_irq = a6xx_gmu_get_irq(gmu, pdev, "gmu", a6xx_gmu_irq);
>   
> -	/* Allocate memory for for the HFI queues */
> -	ret = a6xx_gmu_memory_alloc(gmu, &gmu->hfi, SZ_16K, 0, "hfi");
> -	if (ret)
> -		goto err_memory;
> +		if (gmu->hfi_irq < 0 || gmu->gmu_irq < 0) {
> +			ret = -ENODEV;
> +			goto err_mmio;
> +		}
> +	}
>   
>   	/* Map the GMU registers */
>   	gmu->mmio = a6xx_gmu_get_mmio(pdev, "gmu");
> @@ -2090,26 +2051,6 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
>   		goto err_memory;
>   	}
>   
> -	if (adreno_is_a650_family(adreno_gpu) ||
> -	    adreno_is_a7xx(adreno_gpu)) {
> -		gmu->rscc = a6xx_gmu_get_mmio(pdev, "rscc");
> -		if (IS_ERR(gmu->rscc)) {
> -			ret = -ENODEV;
> -			goto err_mmio;
> -		}
> -	} else {
> -		gmu->rscc = gmu->mmio + 0x23000;
> -	}
> -
> -	/* Get the HFI and GMU interrupts */
> -	gmu->hfi_irq = a6xx_gmu_get_irq(gmu, pdev, "hfi", a6xx_hfi_irq);
> -	gmu->gmu_irq = a6xx_gmu_get_irq(gmu, pdev, "gmu", a6xx_gmu_irq);
> -
> -	if (gmu->hfi_irq < 0 || gmu->gmu_irq < 0) {
> -		ret = -ENODEV;
> -		goto err_mmio;
> -	}
> -
>   	gmu->cxpd = dev_pm_domain_attach_by_name(gmu->dev, "cx");
>   	if (IS_ERR(gmu->cxpd)) {
>   		ret = PTR_ERR(gmu->cxpd);
> @@ -2122,11 +2063,13 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
>   		goto detach_cxpd;
>   	}
>   
> -	/* Other errors are handled during GPU ACD probe */
> -	gmu->qmp = qmp_get(gmu->dev);
> -	if (PTR_ERR_OR_ZERO(gmu->qmp) == -EPROBE_DEFER) {
> -		ret = -EPROBE_DEFER;
> -		goto detach_gxpd;
> +	if (!gmu->legacy) {
> +		/* Other errors are handled during GPU ACD probe */
> +		gmu->qmp = qmp_get(gmu->dev);
> +		if (PTR_ERR_OR_ZERO(gmu->qmp) == -EPROBE_DEFER) {
> +			ret = -EPROBE_DEFER;
> +			goto detach_gxpd;
> +		}
>   	}
>   
>   	init_completion(&gmu->pd_gate);
> @@ -2139,18 +2082,20 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
>   	 */
>   	gmu->gxpd = dev_pm_domain_attach_by_name(gmu->dev, "gx");
>   
> -	/* Get the power levels for the GMU and GPU */
> -	a6xx_gmu_pwrlevels_probe(gmu);
> +	if (!gmu->legacy) {
> +		/* Get the power levels for the GMU and GPU */
> +		a6xx_gmu_pwrlevels_probe(gmu);
>   
> -	ret = a6xx_gmu_acd_probe(gmu);
> -	if (ret)
> -		goto detach_gxpd;
> +		ret = a6xx_gmu_acd_probe(gmu);
> +		if (ret)
> +			goto detach_gxpd;
>   
> -	/* Set up the HFI queues */
> -	a6xx_hfi_init(gmu);
> +		/* Set up the HFI queues */
> +		a6xx_hfi_init(gmu);
>   
> -	/* Initialize RPMh */
> -	a6xx_gmu_rpmh_init(gmu);
> +		/* Initialize RPMh */
> +		a6xx_gmu_rpmh_init(gmu);
> +	}
>   
>   	gmu->initialized = true;
>   
> @@ -2170,16 +2115,57 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
>   
>   err_mmio:
>   	iounmap(gmu->mmio);
> -	if (platform_get_resource_byname(pdev, IORESOURCE_MEM, "rscc"))
> -		iounmap(gmu->rscc);
> -	free_irq(gmu->gmu_irq, gmu);
> -	free_irq(gmu->hfi_irq, gmu);
> +	if (!gmu->legacy) {
> +		if (platform_get_resource_byname(pdev, IORESOURCE_MEM, "rscc"))
> +			iounmap(gmu->rscc);
> +		free_irq(gmu->gmu_irq, gmu);
> +		free_irq(gmu->hfi_irq, gmu);
> +	}
>   
>   err_memory:
> -	a6xx_gmu_memory_free(gmu);
> -err_put_device:
> -	/* Drop reference taken in of_find_device_by_node */
> -	put_device(gmu->dev);
> +	if (!gmu->legacy)
> +		a6xx_gmu_memory_free(gmu);
>   
>   	return ret;
>   }
> +
> +static const struct component_ops a6xx_gmu_ops = {
> +	.bind   = a6xx_gmu_bind,
> +	.unbind = a6xx_gmu_unbind,
> +};
> +
> +static int a6xx_gmu_probe(struct platform_device *pdev)
> +{
> +	return component_add(&pdev->dev, &a6xx_gmu_ops);
> +}
> +
> +static void a6xx_gmu_remove(struct platform_device *pdev)
> +{
> +
> +	component_del(&pdev->dev, &a6xx_gmu_ops);
> +}
> +
> +static const struct of_device_id dt_match[] = {
> +	{ .compatible = "qcom,adreno-gmu" },
> +	{ .compatible = "qcom,adreno-gmu-wrapper" },
> +	{}
> +};
> +
> +static struct platform_driver a6xx_gmu_drv = {
> +	.probe = a6xx_gmu_probe,
> +	.remove = a6xx_gmu_remove,
> +	.driver = {
> +		.name = "a6xx_gmu",
> +		.of_match_table = dt_match,
> +	},
> +};
> +
> +void __init a6xx_gmu_register(void)
> +{
> +	platform_driver_register(&a6xx_gmu_drv);
> +}
> +
> +void __exit a6xx_gmu_unregister(void)
> +{
> +	platform_driver_unregister(&a6xx_gmu_drv);
> +}
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index b8f8ae940b55..d418f49f47a1 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -2352,8 +2352,6 @@ static void a6xx_destroy(struct msm_gpu *gpu)
>   
>   	a6xx_llc_slices_destroy(a6xx_gpu);
>   
> -	a6xx_gmu_remove(a6xx_gpu);
> -
>   	adreno_gpu_cleanup(adreno_gpu);
>   
>   	kfree(a6xx_gpu);
> @@ -2689,10 +2687,6 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
>   	if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
>   		priv->gpu_clamp_to_idle = true;
>   
> -	if (adreno_has_gmu_wrapper(adreno_gpu))
> -		ret = a6xx_gmu_wrapper_init(a6xx_gpu, node);
> -	else
> -		ret = a6xx_gmu_init(a6xx_gpu, node);
>   	of_node_put(node);
>   	if (ret) {
>   		a6xx_destroy(&(a6xx_gpu->base.base));
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> index 0b17d36c36a9..070af413e5ad 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> @@ -252,9 +252,6 @@ bool a6xx_gmu_isidle(struct a6xx_gmu *gmu);
>   int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state);
>   void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state);
>   
> -int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node);
> -int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node);
> -void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu);
>   void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu);
>   
>   void a6xx_preempt_init(struct msm_gpu *gpu);
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
> index 28f744f3caf7..9f9b5863a8de 100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_device.c
> +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
> @@ -415,6 +415,8 @@ void __init adreno_register(void)
>   		return;
>   
>   	platform_driver_register(&adreno_driver);
> +
> +	a6xx_gmu_register();
>   }
>   
>   void __exit adreno_unregister(void)
> @@ -422,5 +424,7 @@ void __exit adreno_unregister(void)
>   	if (skip_gpu)
>   		return;
>   
> +	a6xx_gmu_unregister();
> +
>   	platform_driver_unregister(&adreno_driver);
>   }
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
> index 390fa6720d9b..d3a653adbc72 100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
> @@ -678,6 +678,10 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev);
>   struct msm_gpu *a4xx_gpu_init(struct drm_device *dev);
>   struct msm_gpu *a5xx_gpu_init(struct drm_device *dev);
>   struct msm_gpu *a6xx_gpu_init(struct drm_device *dev);
> +struct msm_gpu *a6xx_gpu_init(struct drm_device *dev);
> +
> +void __init a6xx_gmu_register(void);
> +void __exit a6xx_gmu_unregister(void);
>   
>   static inline uint32_t get_wptr(struct msm_ringbuffer *ring)
>   {
> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> index 7e977fec4100..0618da7e8b40 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -998,18 +998,30 @@ static const struct of_device_id msm_gpu_match[] = {
>   	{ },
>   };
>   
> +static const struct of_device_id msm_gmu_match[] = {
> +	{ .compatible = "qcom,adreno-gmu" },
> +	{ .compatible = "qcom,adreno-gmu-wrapper" },
> +	{ },
> +};
> +
>   static int add_gpu_components(struct device *dev,
>   			      struct component_match **matchptr)
>   {
> -	struct device_node *np;
> +	struct device_node *np, *gmu;
>   
>   	np = of_find_matching_node(NULL, msm_gpu_match);
>   	if (!np)
>   		return 0;
>   
> -	if (of_device_is_available(np) && adreno_has_gpu(np))
> +	if (of_device_is_available(np) && adreno_has_gpu(np)) {
>   		drm_of_component_match_add(dev, matchptr, component_compare_of, np);
>   
> +		gmu = of_find_matching_node(NULL, msm_gmu_match);
> +		if (of_device_is_available(gmu))
> +			drm_of_component_match_add(dev, matchptr, component_compare_of, gmu);
> +		of_node_put(gmu);
> +	}
> +
>   	of_node_put(np);
>   
>   	return 0;
> 
> ---
> base-commit: 211ddde0823f1442e4ad052a2f30f050145ccada
> change-id: 20251022-topic-adreno-attach-gmu-to-driver-e47025fd7ebb
> 
> Best regards,

Hi Neil,

thanks for the patch. With it applied, my GPU fails to initialize.
Here's the related dmesg section:

[    1.733062] [drm:dpu_kms_hw_init:1173] dpu hardware revision:0x50020000
[    1.735229] [drm] Initialized msm 1.13.0 for 
ae01000.display-controller on minor 0
[    1.735403] msm_dpu ae01000.display-controller: 
[drm:adreno_request_fw] loaded qcom/a630_sqe.fw from new location
[    1.735513] msm_dpu ae01000.display-controller: 
[drm:adreno_request_fw] loaded qcom/a630_gmu.bin from new location
[    1.746710] a6xx_gmu 506a000.gmu: [drm:a6xx_gmu_set_oob] *ERROR* 
Timeout waiting for GMU OOB set BOOT_SLUMBER: 0x800000
[    1.746766] msm_dpu ae01000.display-controller: [drm:adreno_load_gpu] 
*ERROR* Couldn't power up the GPU: -110

This could be because I have an Adreno 630-family GPU, which is marked 
as legacy in a6xx_gmu_init / a6xx_gmu_bind. Previously, the rest of the 
init code would just always run, while now, some parts are conditionally 
disabled for legacy GPUs - that may be unintentional? However, 
unconditionally enabling those parts seems to fail to initialize the GPU 
followed by a reset shortly after, so there's probably more to this.

Please let me know if there's anything I can do to help debug this.

Best regards,
Jens

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC RFT] drm/msm: adreno: attach the GMU device to a driver
  2025-10-26  1:31 ` Jens Reidel
@ 2025-10-29 10:25   ` Neil Armstrong
  2025-11-04  1:30     ` Jens Reidel
  0 siblings, 1 reply; 13+ messages in thread
From: Neil Armstrong @ 2025-10-29 10:25 UTC (permalink / raw)
  To: Jens Reidel, Rob Clark, Sean Paul, Konrad Dybcio,
	Dmitry Baryshkov, Abhinav Kumar, Jessica Zhang, Marijn Suijten,
	David Airlie, Simona Vetter
  Cc: linux-arm-msm, dri-devel, freedreno, linux-kernel

Hi,

On 10/26/25 02:31, Jens Reidel wrote:
> On 10/22/25 14:44, Neil Armstrong wrote:
>> Due to the sync_state is enabled by default in pmdomain & CCF since v6.17,
>> the GCC and GPUCC sync_state would stay pending, leaving the resources in
>> full performance:
>> gcc-x1e80100 100000.clock-controller: sync_state() pending due to 3d6a000.gmu
>> gpucc-x1e80100 3d90000.clock-controller: sync_state() pending due to 3d6a000.gmu
>>
>> In order to fix this state and allow the GMU to be properly
>> probed, let's add a proper driver for the GMU and add it to
>> the MSM driver components.
>>
>> Only the proper GMU has been tested since I don't have
>> access to hardware with a GMU wrapper.
>>
>> Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
>> ---
>>   drivers/gpu/drm/msm/adreno/a6xx_gmu.c      | 354 ++++++++++++++---------------
>>   drivers/gpu/drm/msm/adreno/a6xx_gpu.c      |   6 -
>>   drivers/gpu/drm/msm/adreno/a6xx_gpu.h      |   3 -
>>   drivers/gpu/drm/msm/adreno/adreno_device.c |   4 +
>>   drivers/gpu/drm/msm/adreno/adreno_gpu.h    |   4 +
>>   drivers/gpu/drm/msm/msm_drv.c              |  16 +-
>>   6 files changed, 192 insertions(+), 195 deletions(-)
>>

<snip>

>>
>> ---
>> base-commit: 211ddde0823f1442e4ad052a2f30f050145ccada
>> change-id: 20251022-topic-adreno-attach-gmu-to-driver-e47025fd7ebb
>>
>> Best regards,
> 
> Hi Neil,
> 
> thanks for the patch. With it applied, my GPU fails to initialize.
> Here's the related dmesg section:
> 
> [    1.733062] [drm:dpu_kms_hw_init:1173] dpu hardware revision:0x50020000
> [    1.735229] [drm] Initialized msm 1.13.0 for ae01000.display-controller on minor 0
> [    1.735403] msm_dpu ae01000.display-controller: [drm:adreno_request_fw] loaded qcom/a630_sqe.fw from new location
> [    1.735513] msm_dpu ae01000.display-controller: [drm:adreno_request_fw] loaded qcom/a630_gmu.bin from new location
> [    1.746710] a6xx_gmu 506a000.gmu: [drm:a6xx_gmu_set_oob] *ERROR* Timeout waiting for GMU OOB set BOOT_SLUMBER: 0x800000
> [    1.746766] msm_dpu ae01000.display-controller: [drm:adreno_load_gpu] *ERROR* Couldn't power up the GPU: -110
> 
> This could be because I have an Adreno 630-family GPU, which is marked as legacy in a6xx_gmu_init / a6xx_gmu_bind. Previously, the rest of the init code would just always run, while now, some parts are conditionally disabled for legacy GPUs - that may be unintentional? However, unconditionally enabling those parts seems to fail to initialize the GPU followed by a reset shortly after, so there's probably more to this.
> 
> Please let me know if there's anything I can do to help debug this.

Thanks for the report, it's an sdm845 based right ?

I may have mismatched the role of the legacy parameter...

Could you try this on top:
===========================><=====================================
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 6e7c3e627509..403675ed18c7 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -1925,6 +1925,7 @@ static int a6xx_gmu_bind(struct device *dev, struct device *master, void *data)
  	struct msm_drm_private *priv = dev_get_drvdata(master);
  	struct msm_gpu *gpu = dev_to_gpu(&priv->gpu_pdev->dev);
  	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	bool is_wrapper = adreno_has_gmu_wrapper(adreno_gpu);
  	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
  	struct device_link *link;
@@ -1936,18 +1937,18 @@ static int a6xx_gmu_bind(struct device *dev, struct device *master, void *data)
  	if (ret)
  		return ret;

-	if (adreno_has_gmu_wrapper(adreno_gpu))
+	if (is_wrapper)
  		/* Mark legacy for manual SPTPRAC control */
  		gmu->legacy = true;

-	if (!gmu->legacy)
+	if (!is_wrapper)
  		/* Set GMU idle level */
  		gmu->idle_level = (adreno_gpu->info->quirks & ADRENO_QUIRK_IFPC) ?
  			GMU_IDLE_STATE_IFPC : GMU_IDLE_STATE_ACTIVE;

  	pm_runtime_enable(gmu->dev);

-	if (!gmu->legacy) {
+	if (!is_wrapper) {
  		/* Get the list of clocks */
  		ret = a6xx_gmu_clocks_probe(gmu);
  		if (ret)
@@ -2063,7 +2064,7 @@ static int a6xx_gmu_bind(struct device *dev, struct device *master, void *data)
  		goto detach_cxpd;
  	}

-	if (!gmu->legacy) {
+	if (!is_wrapper) {
  		/* Other errors are handled during GPU ACD probe */
  		gmu->qmp = qmp_get(gmu->dev);
  		if (PTR_ERR_OR_ZERO(gmu->qmp) == -EPROBE_DEFER) {
@@ -2082,7 +2083,7 @@ static int a6xx_gmu_bind(struct device *dev, struct device *master, void *data)
  	 */
  	gmu->gxpd = dev_pm_domain_attach_by_name(gmu->dev, "gx");

-	if (!gmu->legacy) {
+	if (!is_wrapper) {
  		/* Get the power levels for the GMU and GPU */
  		a6xx_gmu_pwrlevels_probe(gmu);

@@ -2115,7 +2116,7 @@ static int a6xx_gmu_bind(struct device *dev, struct device *master, void *data)

  err_mmio:
  	iounmap(gmu->mmio);
-	if (!gmu->legacy) {
+	if (!is_wrapper) {
  		if (platform_get_resource_byname(pdev, IORESOURCE_MEM, "rscc"))
  			iounmap(gmu->rscc);
  		free_irq(gmu->gmu_irq, gmu);
@@ -2123,7 +2124,7 @@ static int a6xx_gmu_bind(struct device *dev, struct device *master, void *data)
  	}

  err_memory:
-	if (!gmu->legacy)
+	if (!is_wrapper)
  		a6xx_gmu_memory_free(gmu);

  	return ret;
===========================><=====================================

Thanks,
Neil

> 
> Best regards,
> Jens


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC RFT] drm/msm: adreno: attach the GMU device to a driver
  2025-10-24  9:11       ` Neil Armstrong
@ 2025-10-30  9:52         ` Konrad Dybcio
  2025-10-30 10:02           ` Neil Armstrong
  0 siblings, 1 reply; 13+ messages in thread
From: Konrad Dybcio @ 2025-10-30  9:52 UTC (permalink / raw)
  To: Neil Armstrong, Rob Clark, Sean Paul, Konrad Dybcio,
	Dmitry Baryshkov, Abhinav Kumar, Jessica Zhang, Marijn Suijten,
	David Airlie, Simona Vetter
  Cc: linux-arm-msm, dri-devel, freedreno, linux-kernel

On 10/24/25 11:11 AM, Neil Armstrong wrote:
> On 10/24/25 10:55, Konrad Dybcio wrote:
>> On 10/23/25 10:27 AM, Neil Armstrong wrote:
>>> On 10/22/25 19:09, Konrad Dybcio wrote:
>>>> On 10/22/25 2:44 PM, Neil Armstrong wrote:
>>>>> Due to the sync_state is enabled by default in pmdomain & CCF since v6.17,
>>>>> the GCC and GPUCC sync_state would stay pending, leaving the resources in
>>>>> full performance:
>>>>> gcc-x1e80100 100000.clock-controller: sync_state() pending due to 3d6a000.gmu
>>>>> gpucc-x1e80100 3d90000.clock-controller: sync_state() pending due to 3d6a000.gmu
>>>>
>>>> Does this *actually* cause any harm, by the way?
>>>
>>> ?
>>>
>>>>
>>>> For example on x1e, GMU refers to 2 GPU_CC GDSCs, GPU_CC refers
>>>> to a pair of GCC clocks and GCC refers to VDD_CX
>>>>
>>>> and I see these prints, yet:
>>>>
>>>> /sys/kernel/debug/pm_genpd/gpu_cx_gdsc/current_state:off-0
>>>> /sys/kernel/debug/pm_genpd/gpu_gx_gdsc/current_state:off-0
>>>>
>>>> /sys/kernel/debug/pm_genpd/cx/current_state:on
>>>> /sys/kernel/debug/pm_genpd/cx/perf_state:256 # because of USB3 votes
>>>>
>>>> I'm not super sure where that sync_state comes from either (maybe
>>>> dev_set_drv_sync_state in pmdomain/core?)
>>>
>>> The way we handle the GMU so far is wrong, it abuses the driver model.
>>>
>>> And this is a symptom, whatever the impact it has, it needs to be fixed
>>> in a proper way.
>>>
>>> The sync_state is retained because the gmu device is never probed but
>>> has some clocks and power domains attached to it, which is clearly wrong.
>>
>> Yes I agree, however I'm only debating the commit message claims of
>> 'leaving the resources in full performance', which doesn't seem to be
>> true
> 
> OK so the wording may be a little bit extreme, perhaps something like:
> the GCC and GPUCC sync_state would stay pending, leaving the unused
> power domains enabled for the lifetime of the system.

The debugfs reads above suggest this is actually not happening

Konrad

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC RFT] drm/msm: adreno: attach the GMU device to a driver
  2025-10-30  9:52         ` Konrad Dybcio
@ 2025-10-30 10:02           ` Neil Armstrong
  0 siblings, 0 replies; 13+ messages in thread
From: Neil Armstrong @ 2025-10-30 10:02 UTC (permalink / raw)
  To: Konrad Dybcio, Rob Clark, Sean Paul, Konrad Dybcio,
	Dmitry Baryshkov, Abhinav Kumar, Jessica Zhang, Marijn Suijten,
	David Airlie, Simona Vetter
  Cc: linux-arm-msm, dri-devel, freedreno, linux-kernel

On 10/30/25 10:52, Konrad Dybcio wrote:
> On 10/24/25 11:11 AM, Neil Armstrong wrote:
>> On 10/24/25 10:55, Konrad Dybcio wrote:
>>> On 10/23/25 10:27 AM, Neil Armstrong wrote:
>>>> On 10/22/25 19:09, Konrad Dybcio wrote:
>>>>> On 10/22/25 2:44 PM, Neil Armstrong wrote:
>>>>>> Due to the sync_state is enabled by default in pmdomain & CCF since v6.17,
>>>>>> the GCC and GPUCC sync_state would stay pending, leaving the resources in
>>>>>> full performance:
>>>>>> gcc-x1e80100 100000.clock-controller: sync_state() pending due to 3d6a000.gmu
>>>>>> gpucc-x1e80100 3d90000.clock-controller: sync_state() pending due to 3d6a000.gmu
>>>>>
>>>>> Does this *actually* cause any harm, by the way?
>>>>
>>>> ?
>>>>
>>>>>
>>>>> For example on x1e, GMU refers to 2 GPU_CC GDSCs, GPU_CC refers
>>>>> to a pair of GCC clocks and GCC refers to VDD_CX
>>>>>
>>>>> and I see these prints, yet:
>>>>>
>>>>> /sys/kernel/debug/pm_genpd/gpu_cx_gdsc/current_state:off-0
>>>>> /sys/kernel/debug/pm_genpd/gpu_gx_gdsc/current_state:off-0
>>>>>
>>>>> /sys/kernel/debug/pm_genpd/cx/current_state:on
>>>>> /sys/kernel/debug/pm_genpd/cx/perf_state:256 # because of USB3 votes
>>>>>
>>>>> I'm not super sure where that sync_state comes from either (maybe
>>>>> dev_set_drv_sync_state in pmdomain/core?)
>>>>
>>>> The way we handle the GMU so far is wrong, it abuses the driver model.
>>>>
>>>> And this is a symptom, whatever the impact it has, it needs to be fixed
>>>> in a proper way.
>>>>
>>>> The sync_state is retained because the gmu device is never probed but
>>>> has some clocks and power domains attached to it, which is clearly wrong.
>>>
>>> Yes I agree, however I'm only debating the commit message claims of
>>> 'leaving the resources in full performance', which doesn't seem to be
>>> true
>>
>> OK so the wording may be a little bit extreme, perhaps something like:
>> the GCC and GPUCC sync_state would stay pending, leaving the unused
>> power domains enabled for the lifetime of the system.
> 
> The debugfs reads above suggest this is actually not happening

Oh yeah so let's do nothing, thanks for your very useful nitpick review.

Is there anyone going to review the actual change instead of the commit message wording ?

Neil

> 
> Konrad


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC RFT] drm/msm: adreno: attach the GMU device to a driver
  2025-10-22 12:44 [PATCH RFC RFT] drm/msm: adreno: attach the GMU device to a driver Neil Armstrong
  2025-10-22 17:09 ` Konrad Dybcio
  2025-10-26  1:31 ` Jens Reidel
@ 2025-10-30 21:29 ` Akhil P Oommen
  2025-10-31  8:38   ` Neil Armstrong
  2 siblings, 1 reply; 13+ messages in thread
From: Akhil P Oommen @ 2025-10-30 21:29 UTC (permalink / raw)
  To: Neil Armstrong
  Cc: linux-arm-msm, dri-devel, freedreno, linux-kernel, Rob Clark,
	Sean Paul, Konrad Dybcio, Dmitry Baryshkov, Abhinav Kumar,
	Jessica Zhang, Marijn Suijten, David Airlie, Simona Vetter

On 10/22/2025 6:14 PM, Neil Armstrong wrote:
> Due to the sync_state is enabled by default in pmdomain & CCF since v6.17,
> the GCC and GPUCC sync_state would stay pending, leaving the resources in
> full performance:
> gcc-x1e80100 100000.clock-controller: sync_state() pending due to 3d6a000.gmu
> gpucc-x1e80100 3d90000.clock-controller: sync_state() pending due to 3d6a000.gmu
> 
> In order to fix this state and allow the GMU to be properly
> probed, let's add a proper driver for the GMU and add it to
> the MSM driver components.
> 
> Only the proper GMU has been tested since I don't have
> access to hardware with a GMU wrapper.
> 
> Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
> ---
>  drivers/gpu/drm/msm/adreno/a6xx_gmu.c      | 354 ++++++++++++++---------------
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c      |   6 -
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h      |   3 -
>  drivers/gpu/drm/msm/adreno/adreno_device.c |   4 +
>  drivers/gpu/drm/msm/adreno/adreno_gpu.h    |   4 +
>  drivers/gpu/drm/msm/msm_drv.c              |  16 +-
>  6 files changed, 192 insertions(+), 195 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> index fc62fef2fed8..6e7c3e627509 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> @@ -1859,11 +1859,14 @@ void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu)
>  	pm_runtime_put(&gpu->pdev->dev);
>  }
>  
> -void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)
> +static void a6xx_gmu_unbind(struct device *dev, struct device *master, void *data)
>  {

I feel we should keep gmu and gmu_wrapper implementations separate. It
is already overloaded. How about adding a separate gmu_wrapper_bind_ops
and keep it in the match data?

> -	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
> +	struct platform_device *pdev = to_platform_device(dev);
> +	struct msm_drm_private *priv = dev_get_drvdata(master);
> +	struct msm_gpu *gpu = priv->gpu;

<< snip >>

>  static inline uint32_t get_wptr(struct msm_ringbuffer *ring)
>  {
> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> index 7e977fec4100..0618da7e8b40 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -998,18 +998,30 @@ static const struct of_device_id msm_gpu_match[] = {
>  	{ },
>  };
>  
> +static const struct of_device_id msm_gmu_match[] = {
> +	{ .compatible = "qcom,adreno-gmu" },
> +	{ .compatible = "qcom,adreno-gmu-wrapper" },
> +	{ },
> +};
> +
>  static int add_gpu_components(struct device *dev,
>  			      struct component_match **matchptr)
>  {
> -	struct device_node *np;
> +	struct device_node *np, *gmu;
>  
>  	np = of_find_matching_node(NULL, msm_gpu_match);
>  	if (!np)
>  		return 0;
>  
> -	if (of_device_is_available(np) && adreno_has_gpu(np))
> +	if (of_device_is_available(np) && adreno_has_gpu(np)) {
>  		drm_of_component_match_add(dev, matchptr, component_compare_of, np);
>  
> +		gmu = of_find_matching_node(NULL, msm_gmu_match);

Instead of this, we can probably use the gmu phandle from "qcom,gmu"
property? That is quicker and also doesn't assume that there is only a
single GPU.

> +		if (of_device_is_available(gmu))
> +			drm_of_component_match_add(dev, matchptr, component_compare_of, gmu);
> +		of_node_put(gmu);
I think you missed the recently added headless support. Please check
separate_gpu_kms modparam and msm_gpu_probe().

-Akhil

> +	}
> +
>  	of_node_put(np);
>  
>  	return 0;
> 
> ---
> base-commit: 211ddde0823f1442e4ad052a2f30f050145ccada
> change-id: 20251022-topic-adreno-attach-gmu-to-driver-e47025fd7ebb
> 
> Best regards,

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC RFT] drm/msm: adreno: attach the GMU device to a driver
  2025-10-30 21:29 ` Akhil P Oommen
@ 2025-10-31  8:38   ` Neil Armstrong
  0 siblings, 0 replies; 13+ messages in thread
From: Neil Armstrong @ 2025-10-31  8:38 UTC (permalink / raw)
  To: Akhil P Oommen
  Cc: linux-arm-msm, dri-devel, freedreno, linux-kernel, Rob Clark,
	Sean Paul, Konrad Dybcio, Dmitry Baryshkov, Abhinav Kumar,
	Jessica Zhang, Marijn Suijten, David Airlie, Simona Vetter

Hi,

On 10/30/25 22:29, Akhil P Oommen wrote:
> On 10/22/2025 6:14 PM, Neil Armstrong wrote:
>> Due to the sync_state is enabled by default in pmdomain & CCF since v6.17,
>> the GCC and GPUCC sync_state would stay pending, leaving the resources in
>> full performance:
>> gcc-x1e80100 100000.clock-controller: sync_state() pending due to 3d6a000.gmu
>> gpucc-x1e80100 3d90000.clock-controller: sync_state() pending due to 3d6a000.gmu
>>
>> In order to fix this state and allow the GMU to be properly
>> probed, let's add a proper driver for the GMU and add it to
>> the MSM driver components.
>>
>> Only the proper GMU has been tested since I don't have
>> access to hardware with a GMU wrapper.
>>
>> Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
>> ---
>>   drivers/gpu/drm/msm/adreno/a6xx_gmu.c      | 354 ++++++++++++++---------------
>>   drivers/gpu/drm/msm/adreno/a6xx_gpu.c      |   6 -
>>   drivers/gpu/drm/msm/adreno/a6xx_gpu.h      |   3 -
>>   drivers/gpu/drm/msm/adreno/adreno_device.c |   4 +
>>   drivers/gpu/drm/msm/adreno/adreno_gpu.h    |   4 +
>>   drivers/gpu/drm/msm/msm_drv.c              |  16 +-
>>   6 files changed, 192 insertions(+), 195 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
>> index fc62fef2fed8..6e7c3e627509 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
>> @@ -1859,11 +1859,14 @@ void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu)
>>   	pm_runtime_put(&gpu->pdev->dev);
>>   }
>>   
>> -void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)
>> +static void a6xx_gmu_unbind(struct device *dev, struct device *master, void *data)
>>   {
> 
> I feel we should keep gmu and gmu_wrapper implementations separate. It
> is already overloaded. How about adding a separate gmu_wrapper_bind_ops
> and keep it in the match data?

Good idea, will try something like that.

> 
>> -	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
>> +	struct platform_device *pdev = to_platform_device(dev);
>> +	struct msm_drm_private *priv = dev_get_drvdata(master);
>> +	struct msm_gpu *gpu = priv->gpu;
> 
> << snip >>
> 
>>   static inline uint32_t get_wptr(struct msm_ringbuffer *ring)
>>   {
>> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
>> index 7e977fec4100..0618da7e8b40 100644
>> --- a/drivers/gpu/drm/msm/msm_drv.c
>> +++ b/drivers/gpu/drm/msm/msm_drv.c
>> @@ -998,18 +998,30 @@ static const struct of_device_id msm_gpu_match[] = {
>>   	{ },
>>   };
>>   
>> +static const struct of_device_id msm_gmu_match[] = {
>> +	{ .compatible = "qcom,adreno-gmu" },
>> +	{ .compatible = "qcom,adreno-gmu-wrapper" },
>> +	{ },
>> +};
>> +
>>   static int add_gpu_components(struct device *dev,
>>   			      struct component_match **matchptr)
>>   {
>> -	struct device_node *np;
>> +	struct device_node *np, *gmu;
>>   
>>   	np = of_find_matching_node(NULL, msm_gpu_match);
>>   	if (!np)
>>   		return 0;
>>   
>> -	if (of_device_is_available(np) && adreno_has_gpu(np))
>> +	if (of_device_is_available(np) && adreno_has_gpu(np)) {
>>   		drm_of_component_match_add(dev, matchptr, component_compare_of, np);
>>   
>> +		gmu = of_find_matching_node(NULL, msm_gmu_match);
> 
> Instead of this, we can probably use the gmu phandle from "qcom,gmu"
> property? That is quicker and also doesn't assume that there is only a
> single GPU.

Ack you're right, let's do this since we have the GPU node already.

> 
>> +		if (of_device_is_available(gmu))
>> +			drm_of_component_match_add(dev, matchptr, component_compare_of, gmu);
>> +		of_node_put(gmu);
> I think you missed the recently added headless support. Please check
> separate_gpu_kms modparam and msm_gpu_probe().

I saw it but seems I probably forgot the check if it's still functional,
will double check.

Thanks,
Neil

> 
> -Akhil
> 
>> +	}
>> +
>>   	of_node_put(np);
>>   
>>   	return 0;
>>
>> ---
>> base-commit: 211ddde0823f1442e4ad052a2f30f050145ccada
>> change-id: 20251022-topic-adreno-attach-gmu-to-driver-e47025fd7ebb
>>
>> Best regards,


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC RFT] drm/msm: adreno: attach the GMU device to a driver
  2025-10-29 10:25   ` Neil Armstrong
@ 2025-11-04  1:30     ` Jens Reidel
  2025-11-04 12:43       ` Neil Armstrong
  0 siblings, 1 reply; 13+ messages in thread
From: Jens Reidel @ 2025-11-04  1:30 UTC (permalink / raw)
  To: Neil Armstrong, Rob Clark, Sean Paul, Konrad Dybcio,
	Dmitry Baryshkov, Abhinav Kumar, Jessica Zhang, Marijn Suijten,
	David Airlie, Simona Vetter
  Cc: linux-arm-msm, dri-devel, freedreno, linux-kernel

Hi Neil,

On 10/29/25 11:25 AM, Neil Armstrong wrote:
> Hi,
> 
> On 10/26/25 02:31, Jens Reidel wrote:
>> On 10/22/25 14:44, Neil Armstrong wrote:
>>> Due to the sync_state is enabled by default in pmdomain & CCF since 
>>> v6.17,
>>> the GCC and GPUCC sync_state would stay pending, leaving the 
>>> resources in
>>> full performance:
>>> gcc-x1e80100 100000.clock-controller: sync_state() pending due to 
>>> 3d6a000.gmu
>>> gpucc-x1e80100 3d90000.clock-controller: sync_state() pending due to 
>>> 3d6a000.gmu
>>>
>>> In order to fix this state and allow the GMU to be properly
>>> probed, let's add a proper driver for the GMU and add it to
>>> the MSM driver components.
>>>
>>> Only the proper GMU has been tested since I don't have
>>> access to hardware with a GMU wrapper.
>>>
>>> Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
>>> ---
>>>   drivers/gpu/drm/msm/adreno/a6xx_gmu.c      | 354 +++++++++++++ 
>>> +---------------
>>>   drivers/gpu/drm/msm/adreno/a6xx_gpu.c      |   6 -
>>>   drivers/gpu/drm/msm/adreno/a6xx_gpu.h      |   3 -
>>>   drivers/gpu/drm/msm/adreno/adreno_device.c |   4 +
>>>   drivers/gpu/drm/msm/adreno/adreno_gpu.h    |   4 +
>>>   drivers/gpu/drm/msm/msm_drv.c              |  16 +-
>>>   6 files changed, 192 insertions(+), 195 deletions(-)
>>>
> 
> <snip>
> 
>>>
>>> ---
>>> base-commit: 211ddde0823f1442e4ad052a2f30f050145ccada
>>> change-id: 20251022-topic-adreno-attach-gmu-to-driver-e47025fd7ebb
>>>
>>> Best regards,
>>
>> Hi Neil,
>>
>> thanks for the patch. With it applied, my GPU fails to initialize.
>> Here's the related dmesg section:
>>
>> [    1.733062] [drm:dpu_kms_hw_init:1173] dpu hardware 
>> revision:0x50020000
>> [    1.735229] [drm] Initialized msm 1.13.0 for ae01000.display- 
>> controller on minor 0
>> [    1.735403] msm_dpu ae01000.display-controller: 
>> [drm:adreno_request_fw] loaded qcom/a630_sqe.fw from new location
>> [    1.735513] msm_dpu ae01000.display-controller: 
>> [drm:adreno_request_fw] loaded qcom/a630_gmu.bin from new location
>> [    1.746710] a6xx_gmu 506a000.gmu: [drm:a6xx_gmu_set_oob] *ERROR* 
>> Timeout waiting for GMU OOB set BOOT_SLUMBER: 0x800000
>> [    1.746766] msm_dpu ae01000.display-controller: 
>> [drm:adreno_load_gpu] *ERROR* Couldn't power up the GPU: -110
>>
>> This could be because I have an Adreno 630-family GPU, which is marked 
>> as legacy in a6xx_gmu_init / a6xx_gmu_bind. Previously, the rest of 
>> the init code would just always run, while now, some parts are 
>> conditionally disabled for legacy GPUs - that may be unintentional? 
>> However, unconditionally enabling those parts seems to fail to 
>> initialize the GPU followed by a reset shortly after, so there's 
>> probably more to this.
>>
>> Please let me know if there's anything I can do to help debug this.
> 
> Thanks for the report, it's an sdm845 based right ?

Almost, it's SM7150 with Adreno 618.

> 
> I may have mismatched the role of the legacy parameter...
> 
> Could you try this on top:

<snip>

> ===========================><=====================================

This is about what I had already tried earlier. I wasn't able to grab a 
log from
UART to see what exactly was still wrong back then, but I finally got 
around to it today.

Short excerpt from decoded stacktrace:

[    4.838573] Unable to handle kernel paging request at virtual address 
0000000000023010
[    4.846726] Mem abort info:
[    4.857916]   ESR = 0x0000000096000044
[    4.870865]   EC = 0x25: DABT (current EL), IL = 32 bits
[    4.883897]   SET = 0, FnV = 0
[    4.895344]   EA = 0, S1PTW = 0
[    4.898584]   FSC = 0x04: level 0 translation fault
[    4.898586] Data abort info:
[    4.898587]   ISV = 0, ISS = 0x00000044, ISS2 = 0x00000000
[    4.898589]   CM = 0, WnR = 1, TnD = 0, TagAccess = 0
[    4.898591]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
[    4.898593] [0000000000023010] user address but active_mm is swapper
[    4.898597] Internal error: Oops: 0000000096000044 [#1]  SMP
[    4.898600] Modules linked in:
[    4.898612] Tainted: [W]=WARN
[    4.898613] Hardware name: xiaomi Xiaomi POCO X3 NFC (Huaxing)/Xiaomi 
POCO X3 NFC (Huaxing), BIOS 2025.10-gcb980be18336 10/01/2025
[    4.898616] Workqueue: events_unbound deferred_probe_work_func
[    4.911316]
[    4.911318] pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS 
BTYPE=--)
[    4.911321] pc : a6xx_gmu_rpmh_init (arch/arm64/include/asm/io.h:43 
include/asm-generic/io.h:293 drivers/gpu/drm/msm/adreno/a6xx_gmu.h:183 
drivers/gpu/drm/msm/adreno/a6xx_gmu.c:621)
[    4.911327] lr : a6xx_gmu_rpmh_init 
(drivers/gpu/drm/msm/adreno/a6xx_gmu.c:1811)
[    4.911331] sp : ffff8000809f3560
[    4.911332] x29: ffff8000809f3560 x28: 0000000000000001
[    4.919938]  x27: ffff800081e50000
[    4.919940] x26: 0000000000000300 x25: 0068000000000413 x24: 
ffffc51d5cca9000
[    4.919944] x23: 0000000000030090 x22: ffff000080aec3b0 x21: 
ffff00008162c010
[    4.919947] x20: ffff000080aec578 x19: ffff800081f90000 x18: 
000000000aa663d1
[    4.919950] x17: ffffc51d5cefc000 x16: ffffc51d5cca9d80 x15: 
0078000000000f13
[    4.930595]
[    4.930596] x14: 0000000000000000 x13: ffff800081f9ffff x12: 
ffff800081f9ffff
[    4.930600] x11: 0000000001000000 x10: 0000000000023010 x9 : 
0000000000000000
[    4.930603] x8 : 0000000000000000 x7 : ffff00008155a960 x6 : 
0000000000000000
[    4.930606] x5 : 0000000000000cc0 x4 : 0000000000001000 x3 : 
007800000b49ff13
[    4.930610] x2 : 000000000b4a0000
[    4.942943]  x1 : ffff800081fa0000 x0 : ffff800081e50000
[    4.942947] Call trace:
[    4.942948]  a6xx_gmu_rpmh_init (arch/arm64/include/asm/io.h:43 
include/asm-generic/io.h:293 drivers/gpu/drm/msm/adreno/a6xx_gmu.h:183 
drivers/gpu/drm/msm/adreno/a6xx_gmu.c:621) (P)
[    4.942954]  a6xx_gmu_bind (drivers/gpu/drm/msm/adreno/a6xx_gmu.c:2102)
[    4.942957]  component_bind_all (drivers/base/component.c:660)
[    4.956709]  msm_drm_init (drivers/gpu/drm/msm/msm_drv.c:159)
[    4.956714]  msm_drm_bind (drivers/gpu/drm/msm/msm_drv.c:1032)

Turns out that previously, gmu->mmio was assigned before setting
gmu->rscc = gmu->mmio + 0x23000;
With your changes, the order is now wrong.
Moving the assignment up again (and applying the diff you shared
for proper handling of legacy parameter) fixes it:

==========================================
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -2027,6 +2027,13 @@ static int a6xx_gmu_bind(struct device *dev, 
struct device *master, void *data)
                 if (ret)
                         goto err_memory;

+               /* Map the GMU registers */
+               gmu->mmio = a6xx_gmu_get_mmio(pdev, "gmu");
+               if (IS_ERR(gmu->mmio)) {
+                       ret = PTR_ERR(gmu->mmio);
+                       goto err_memory;
+               }
+
                 if (adreno_is_a650_family(adreno_gpu) ||
                     adreno_is_a7xx(adreno_gpu)) {
                         gmu->rscc = a6xx_gmu_get_mmio(pdev, "rscc");
@@ -2048,13 +2055,6 @@ static int a6xx_gmu_bind(struct device *dev, 
struct device *master, void *data)
                 }
         }

-       /* Map the GMU registers */
-       gmu->mmio = a6xx_gmu_get_mmio(pdev, "gmu");
-       if (IS_ERR(gmu->mmio)) {
-               ret = PTR_ERR(gmu->mmio);
-               goto err_memory;
-       }
-
         gmu->cxpd = dev_pm_domain_attach_by_name(gmu->dev, "cx");
         if (IS_ERR(gmu->cxpd)) {
                 ret = PTR_ERR(gmu->cxpd);
==========================================

This almost certainly isn't correct either because the wrapper needs
its registers mapped too, perhaps this is better suited for moving it
above the if block, I think that makes more sense.

With the legacy parameter changes and GMU register mapping prior to RSCC
offset calculation:Tested-by: Jens Reidel <adrian@mainlining.org> # SM7150

Best regards,Jens
> 
> Thanks,
> Neil
> 
>>
>> Best regards,
>> Jens
> 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC RFT] drm/msm: adreno: attach the GMU device to a driver
  2025-11-04  1:30     ` Jens Reidel
@ 2025-11-04 12:43       ` Neil Armstrong
  0 siblings, 0 replies; 13+ messages in thread
From: Neil Armstrong @ 2025-11-04 12:43 UTC (permalink / raw)
  To: Jens Reidel, Rob Clark, Sean Paul, Konrad Dybcio,
	Dmitry Baryshkov, Abhinav Kumar, Jessica Zhang, Marijn Suijten,
	David Airlie, Simona Vetter
  Cc: linux-arm-msm, dri-devel, freedreno, linux-kernel

On 11/4/25 02:30, Jens Reidel wrote:
> Hi Neil,
> 
> On 10/29/25 11:25 AM, Neil Armstrong wrote:
>> Hi,
>>
>> On 10/26/25 02:31, Jens Reidel wrote:
>>> On 10/22/25 14:44, Neil Armstrong wrote:
>>>> Due to the sync_state is enabled by default in pmdomain & CCF since v6.17,
>>>> the GCC and GPUCC sync_state would stay pending, leaving the resources in
>>>> full performance:
>>>> gcc-x1e80100 100000.clock-controller: sync_state() pending due to 3d6a000.gmu
>>>> gpucc-x1e80100 3d90000.clock-controller: sync_state() pending due to 3d6a000.gmu
>>>>
>>>> In order to fix this state and allow the GMU to be properly
>>>> probed, let's add a proper driver for the GMU and add it to
>>>> the MSM driver components.
>>>>
>>>> Only the proper GMU has been tested since I don't have
>>>> access to hardware with a GMU wrapper.
>>>>
>>>> Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
>>>> ---
>>>>   drivers/gpu/drm/msm/adreno/a6xx_gmu.c      | 354 +++++++++++++ +---------------
>>>>   drivers/gpu/drm/msm/adreno/a6xx_gpu.c      |   6 -
>>>>   drivers/gpu/drm/msm/adreno/a6xx_gpu.h      |   3 -
>>>>   drivers/gpu/drm/msm/adreno/adreno_device.c |   4 +
>>>>   drivers/gpu/drm/msm/adreno/adreno_gpu.h    |   4 +
>>>>   drivers/gpu/drm/msm/msm_drv.c              |  16 +-
>>>>   6 files changed, 192 insertions(+), 195 deletions(-)
>>>>
>>
>> <snip>
>>
>>>>
>>>> ---
>>>> base-commit: 211ddde0823f1442e4ad052a2f30f050145ccada
>>>> change-id: 20251022-topic-adreno-attach-gmu-to-driver-e47025fd7ebb
>>>>
>>>> Best regards,
>>>
>>> Hi Neil,
>>>
>>> thanks for the patch. With it applied, my GPU fails to initialize.
>>> Here's the related dmesg section:
>>>
>>> [    1.733062] [drm:dpu_kms_hw_init:1173] dpu hardware revision:0x50020000
>>> [    1.735229] [drm] Initialized msm 1.13.0 for ae01000.display- controller on minor 0
>>> [    1.735403] msm_dpu ae01000.display-controller: [drm:adreno_request_fw] loaded qcom/a630_sqe.fw from new location
>>> [    1.735513] msm_dpu ae01000.display-controller: [drm:adreno_request_fw] loaded qcom/a630_gmu.bin from new location
>>> [    1.746710] a6xx_gmu 506a000.gmu: [drm:a6xx_gmu_set_oob] *ERROR* Timeout waiting for GMU OOB set BOOT_SLUMBER: 0x800000
>>> [    1.746766] msm_dpu ae01000.display-controller: [drm:adreno_load_gpu] *ERROR* Couldn't power up the GPU: -110
>>>
>>> This could be because I have an Adreno 630-family GPU, which is marked as legacy in a6xx_gmu_init / a6xx_gmu_bind. Previously, the rest of the init code would just always run, while now, some parts are conditionally disabled for legacy GPUs - that may be unintentional? However, unconditionally enabling those parts seems to fail to initialize the GPU followed by a reset shortly after, so there's probably more to this.
>>>
>>> Please let me know if there's anything I can do to help debug this.
>>
>> Thanks for the report, it's an sdm845 based right ?
> 
> Almost, it's SM7150 with Adreno 618.
> 
>>
>> I may have mismatched the role of the legacy parameter...
>>
>> Could you try this on top:
> 
> <snip>
> 
>> ===========================><=====================================
> 
> This is about what I had already tried earlier. I wasn't able to grab a log from
> UART to see what exactly was still wrong back then, but I finally got around to it today.
> 
> Short excerpt from decoded stacktrace:
> 
> [    4.838573] Unable to handle kernel paging request at virtual address 0000000000023010
> [    4.846726] Mem abort info:
> [    4.857916]   ESR = 0x0000000096000044
> [    4.870865]   EC = 0x25: DABT (current EL), IL = 32 bits
> [    4.883897]   SET = 0, FnV = 0
> [    4.895344]   EA = 0, S1PTW = 0
> [    4.898584]   FSC = 0x04: level 0 translation fault
> [    4.898586] Data abort info:
> [    4.898587]   ISV = 0, ISS = 0x00000044, ISS2 = 0x00000000
> [    4.898589]   CM = 0, WnR = 1, TnD = 0, TagAccess = 0
> [    4.898591]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
> [    4.898593] [0000000000023010] user address but active_mm is swapper
> [    4.898597] Internal error: Oops: 0000000096000044 [#1]  SMP
> [    4.898600] Modules linked in:
> [    4.898612] Tainted: [W]=WARN
> [    4.898613] Hardware name: xiaomi Xiaomi POCO X3 NFC (Huaxing)/Xiaomi POCO X3 NFC (Huaxing), BIOS 2025.10-gcb980be18336 10/01/2025
> [    4.898616] Workqueue: events_unbound deferred_probe_work_func
> [    4.911316]
> [    4.911318] pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> [    4.911321] pc : a6xx_gmu_rpmh_init (arch/arm64/include/asm/io.h:43 include/asm-generic/io.h:293 drivers/gpu/drm/msm/adreno/a6xx_gmu.h:183 drivers/gpu/drm/msm/adreno/a6xx_gmu.c:621)
> [    4.911327] lr : a6xx_gmu_rpmh_init (drivers/gpu/drm/msm/adreno/a6xx_gmu.c:1811)
> [    4.911331] sp : ffff8000809f3560
> [    4.911332] x29: ffff8000809f3560 x28: 0000000000000001
> [    4.919938]  x27: ffff800081e50000
> [    4.919940] x26: 0000000000000300 x25: 0068000000000413 x24: ffffc51d5cca9000
> [    4.919944] x23: 0000000000030090 x22: ffff000080aec3b0 x21: ffff00008162c010
> [    4.919947] x20: ffff000080aec578 x19: ffff800081f90000 x18: 000000000aa663d1
> [    4.919950] x17: ffffc51d5cefc000 x16: ffffc51d5cca9d80 x15: 0078000000000f13
> [    4.930595]
> [    4.930596] x14: 0000000000000000 x13: ffff800081f9ffff x12: ffff800081f9ffff
> [    4.930600] x11: 0000000001000000 x10: 0000000000023010 x9 : 0000000000000000
> [    4.930603] x8 : 0000000000000000 x7 : ffff00008155a960 x6 : 0000000000000000
> [    4.930606] x5 : 0000000000000cc0 x4 : 0000000000001000 x3 : 007800000b49ff13
> [    4.930610] x2 : 000000000b4a0000
> [    4.942943]  x1 : ffff800081fa0000 x0 : ffff800081e50000
> [    4.942947] Call trace:
> [    4.942948]  a6xx_gmu_rpmh_init (arch/arm64/include/asm/io.h:43 include/asm-generic/io.h:293 drivers/gpu/drm/msm/adreno/a6xx_gmu.h:183 drivers/gpu/drm/msm/adreno/a6xx_gmu.c:621) (P)
> [    4.942954]  a6xx_gmu_bind (drivers/gpu/drm/msm/adreno/a6xx_gmu.c:2102)
> [    4.942957]  component_bind_all (drivers/base/component.c:660)
> [    4.956709]  msm_drm_init (drivers/gpu/drm/msm/msm_drv.c:159)
> [    4.956714]  msm_drm_bind (drivers/gpu/drm/msm/msm_drv.c:1032)
> 
> Turns out that previously, gmu->mmio was assigned before setting
> gmu->rscc = gmu->mmio + 0x23000;
> With your changes, the order is now wrong.

Oh crap

> Moving the assignment up again (and applying the diff you shared
> for proper handling of legacy parameter) fixes it:
> 
> ==========================================
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> @@ -2027,6 +2027,13 @@ static int a6xx_gmu_bind(struct device *dev, struct device *master, void *data)
>                  if (ret)
>                          goto err_memory;
> 
> +               /* Map the GMU registers */
> +               gmu->mmio = a6xx_gmu_get_mmio(pdev, "gmu");
> +               if (IS_ERR(gmu->mmio)) {
> +                       ret = PTR_ERR(gmu->mmio);
> +                       goto err_memory;
> +               }
> +
>                  if (adreno_is_a650_family(adreno_gpu) ||
>                      adreno_is_a7xx(adreno_gpu)) {
>                          gmu->rscc = a6xx_gmu_get_mmio(pdev, "rscc");
> @@ -2048,13 +2055,6 @@ static int a6xx_gmu_bind(struct device *dev, struct device *master, void *data)
>                  }
>          }
> 
> -       /* Map the GMU registers */
> -       gmu->mmio = a6xx_gmu_get_mmio(pdev, "gmu");
> -       if (IS_ERR(gmu->mmio)) {
> -               ret = PTR_ERR(gmu->mmio);
> -               goto err_memory;
> -       }
> -
>          gmu->cxpd = dev_pm_domain_attach_by_name(gmu->dev, "cx");
>          if (IS_ERR(gmu->cxpd)) {
>                  ret = PTR_ERR(gmu->cxpd);
> ==========================================
> 
> This almost certainly isn't correct either because the wrapper needs
> its registers mapped too, perhaps this is better suited for moving it
> above the if block, I think that makes more sense.

Yes, merging both functions was a bad move...

> 
> With the legacy parameter changes and GMU register mapping prior to RSCC
> offset calculation:Tested-by: Jens Reidel <adrian@mainlining.org> # SM7150

Thanks for testing !

Following Akhil's review, I'll probably keep the wrapper and normal
gmu bind/unbind separated for the first step.

Neil

> 
> Best regards,Jens
>>
>> Thanks,
>> Neil
>>
>>>
>>> Best regards,
>>> Jens
>>


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2025-11-04 12:43 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-22 12:44 [PATCH RFC RFT] drm/msm: adreno: attach the GMU device to a driver Neil Armstrong
2025-10-22 17:09 ` Konrad Dybcio
2025-10-23  8:27   ` Neil Armstrong
2025-10-24  8:55     ` Konrad Dybcio
2025-10-24  9:11       ` Neil Armstrong
2025-10-30  9:52         ` Konrad Dybcio
2025-10-30 10:02           ` Neil Armstrong
2025-10-26  1:31 ` Jens Reidel
2025-10-29 10:25   ` Neil Armstrong
2025-11-04  1:30     ` Jens Reidel
2025-11-04 12:43       ` Neil Armstrong
2025-10-30 21:29 ` Akhil P Oommen
2025-10-31  8:38   ` Neil Armstrong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).