[PATCH] drm/amdgpu: Fix CP_MEC_MDBASE in multi-xcc for gfx v12_1

AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Alex Deucher <alexander.deucher@amd.com>
To: <amd-gfx@lists.freedesktop.org>
Cc: Michael Chen <michael.chen@amd.com>,
	Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>,
	Shaoyun.liu <Shaoyun.liu@amd.com>,
	"Alex Deucher" <alexander.deucher@amd.com>
Subject: [PATCH] drm/amdgpu: Fix CP_MEC_MDBASE in multi-xcc for gfx v12_1
Date: Wed, 10 Dec 2025 02:14:04 -0500	[thread overview]
Message-ID: <20251210071415.19983-10-alexander.deucher@amd.com> (raw)
In-Reply-To: <20251210071415.19983-1-alexander.deucher@amd.com>

From: Michael Chen <michael.chen@amd.com>

Need to allocate memory for MEC FW data and program
registers CP_MEC_MDBASE for each XCC respectively.

Signed-off-by: Michael Chen <michael.chen@amd.com>
Acked-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Reviewed-by: Shaoyun.liu <Shaoyun.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c | 192 +++++++++++++------------
 1 file changed, 98 insertions(+), 94 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
index 61ffba9a252bd..347912596a1b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -1887,20 +1887,18 @@ static void gfx_v12_1_xcc_cp_compute_enable(struct amdgpu_device *adev,
 }
 
 static int gfx_v12_1_xcc_cp_compute_load_microcode_rs64(struct amdgpu_device *adev,
-							int xcc_id)
+							uint16_t xcc_mask)
 {
 	const struct gfx_firmware_header_v2_0 *mec_hdr;
 	const __le32 *fw_ucode, *fw_data;
 	u32 tmp, fw_ucode_size, fw_data_size;
 	u32 i, usec_timeout = 50000; /* Wait for 50 ms */
 	u32 *fw_ucode_ptr, *fw_data_ptr;
-	int r;
+	int r, xcc_id;
 
 	if (!adev->gfx.mec_fw)
 		return -EINVAL;
 
-	gfx_v12_1_xcc_cp_compute_enable(adev, false, xcc_id);
-
 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
 
@@ -1925,7 +1923,7 @@ static int gfx_v12_1_xcc_cp_compute_load_microcode_rs64(struct amdgpu_device *ad
 
 	r = amdgpu_bo_create_reserved(adev,
 				      ALIGN(fw_data_size, 64 * 1024) *
-				      adev->gfx.mec.num_pipe_per_mec,
+				      adev->gfx.mec.num_pipe_per_mec * NUM_XCC(xcc_mask),
 				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
 				      &adev->gfx.mec.mec_fw_data_obj,
 				      &adev->gfx.mec.mec_fw_data_gpu_addr,
@@ -1937,8 +1935,12 @@ static int gfx_v12_1_xcc_cp_compute_load_microcode_rs64(struct amdgpu_device *ad
 	}
 
 	memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
-	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
-		memcpy(fw_data_ptr + i * ALIGN(fw_data_size, 64 * 1024) / 4, fw_data, fw_data_size);
+	for_each_inst(xcc_id, xcc_mask) {
+		for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+			u32 offset = (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) *
+				     ALIGN(fw_data_size, 64 * 1024) / 4;
+			memcpy(fw_data_ptr + offset, fw_data, fw_data_size);
+		}
 	}
 
 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
@@ -1946,75 +1948,81 @@ static int gfx_v12_1_xcc_cp_compute_load_microcode_rs64(struct amdgpu_device *ad
 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
 
-	tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL);
-	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
-	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
-	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
-	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL, tmp);
-
-	tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL);
-	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
-	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
-	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL, tmp);
-
-	mutex_lock(&adev->srbm_mutex);
-	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
-		soc_v1_0_grbm_select(adev, 1, i, 0, 0, GET_INST(GC, xcc_id));
+	for_each_inst(xcc_id, xcc_mask) {
+		gfx_v12_1_xcc_cp_compute_enable(adev, false, xcc_id);
 
-		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_LO,
-			     lower_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
-					   i * ALIGN(fw_data_size, 64 * 1024)));
-		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_HI,
-			     upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
-					   i * ALIGN(fw_data_size, 64 * 1024)));
+		tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL);
+		tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+		tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+		tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL, tmp);
 
-		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_LO,
-			     lower_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
-		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_HI,
-			     upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
-	}
-	mutex_unlock(&adev->srbm_mutex);
-	soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, 0));
+		tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL);
+		tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
+		tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
+		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL, tmp);
 
-	/* Trigger an invalidation of the L1 instruction caches */
-	tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL);
-	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
-	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL, tmp);
+		mutex_lock(&adev->srbm_mutex);
+		for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+			soc_v1_0_grbm_select(adev, 1, i, 0, 0, GET_INST(GC, xcc_id));
+
+			WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_LO,
+					lower_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
+									(xcc_id * adev->gfx.mec.num_pipe_per_mec + i) *
+									ALIGN(fw_data_size, 64 * 1024)));
+			WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_HI,
+					upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
+									(xcc_id * adev->gfx.mec.num_pipe_per_mec + i) *
+									ALIGN(fw_data_size, 64 * 1024)));
+
+			WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_LO,
+					lower_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+			WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_HI,
+					upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+		}
+		mutex_unlock(&adev->srbm_mutex);
+		soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, 0));
 
-	/* Wait for invalidation complete */
-	for (i = 0; i < usec_timeout; i++) {
+		/* Trigger an invalidation of the L1 instruction caches */
 		tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL);
-		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
-				       INVALIDATE_DCACHE_COMPLETE))
-			break;
-		udelay(1);
-	}
-
-	if (i >= usec_timeout) {
-		dev_err(adev->dev, "failed to invalidate instruction cache\n");
-		return -EINVAL;
-	}
+		tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL, tmp);
+
+		/* Wait for invalidation complete */
+		for (i = 0; i < usec_timeout; i++) {
+			tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL);
+			if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
+						INVALIDATE_DCACHE_COMPLETE))
+				break;
+			udelay(1);
+		}
 
-	/* Trigger an invalidation of the L1 instruction caches */
-	tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL);
-	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
-	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL, tmp);
+		if (i >= usec_timeout) {
+			dev_err(adev->dev, "failed to invalidate instruction cache\n");
+			return -EINVAL;
+		}
 
-	/* Wait for invalidation complete */
-	for (i = 0; i < usec_timeout; i++) {
+		/* Trigger an invalidation of the L1 instruction caches */
 		tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL);
-		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
-				       INVALIDATE_CACHE_COMPLETE))
-			break;
-		udelay(1);
-	}
+		tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL, tmp);
+
+		/* Wait for invalidation complete */
+		for (i = 0; i < usec_timeout; i++) {
+			tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL);
+			if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+						INVALIDATE_CACHE_COMPLETE))
+				break;
+			udelay(1);
+		}
 
-	if (i >= usec_timeout) {
-		dev_err(adev->dev, "failed to invalidate instruction cache\n");
-		return -EINVAL;
-	}
+		if (i >= usec_timeout) {
+			dev_err(adev->dev, "failed to invalidate instruction cache\n");
+			return -EINVAL;
+		}
 
-	gfx_v12_1_xcc_set_mec_ucode_start_addr(adev, xcc_id);
+		gfx_v12_1_xcc_set_mec_ucode_start_addr(adev, xcc_id);
+	}
 
 	return 0;
 }
@@ -2411,42 +2419,43 @@ static int gfx_v12_1_xcc_kcq_resume(struct amdgpu_device *adev,
 	return r;
 }
 
-static int gfx_v12_1_xcc_cp_resume(struct amdgpu_device *adev,
-				   int xcc_id)
+static int gfx_v12_1_xcc_cp_resume(struct amdgpu_device *adev, uint16_t xcc_mask)
 {
-	int r, i;
+	int r, i, xcc_id;
 	struct amdgpu_ring *ring;
 
-	if (!(adev->flags & AMD_IS_APU))
-		gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, false, xcc_id);
-
 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
 		/* legacy firmware loading */
-		r = gfx_v12_1_xcc_cp_compute_load_microcode_rs64(adev, xcc_id);
+		r = gfx_v12_1_xcc_cp_compute_load_microcode_rs64(adev, xcc_mask);
 		if (r)
 			return r;
 	}
 
-	gfx_v12_1_xcc_cp_set_doorbell_range(adev, xcc_id);
+	for_each_inst(xcc_id, xcc_mask) {
+		if (!(adev->flags & AMD_IS_APU))
+			gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, false, xcc_id);
 
-	gfx_v12_1_xcc_cp_compute_enable(adev, true, xcc_id);
+		gfx_v12_1_xcc_cp_set_doorbell_range(adev, xcc_id);
 
-	if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
-		r = amdgpu_mes_kiq_hw_init(adev, xcc_id);
-	else
-		r = gfx_v12_1_xcc_kiq_resume(adev, xcc_id);
-	if (r)
-		return r;
+		gfx_v12_1_xcc_cp_compute_enable(adev, true, xcc_id);
 
-	r = gfx_v12_1_xcc_kcq_resume(adev, xcc_id);
-	if (r)
-		return r;
+		if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
+			r = amdgpu_mes_kiq_hw_init(adev, xcc_id);
+		else
+			r = gfx_v12_1_xcc_kiq_resume(adev, xcc_id);
+		if (r)
+			return r;
 
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
-		r = amdgpu_ring_test_helper(ring);
+		r = gfx_v12_1_xcc_kcq_resume(adev, xcc_id);
 		if (r)
 			return r;
+
+		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+			ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
+			r = amdgpu_ring_test_helper(ring);
+			if (r)
+				return r;
+		}
 	}
 
 	return 0;
@@ -3923,14 +3932,9 @@ static int gfx_v12_1_xcp_resume(void *handle, uint32_t inst_mask)
 		}
 	}
 
-	tmp_mask = inst_mask;
-	for_each_inst(i, tmp_mask) {
-		r = gfx_v12_1_xcc_cp_resume(adev, i);
-		if (r)
-			return r;
-	}
+	r = gfx_v12_1_xcc_cp_resume(adev, inst_mask);
 
-	return 0;
+	return r;
 }
 
 static int gfx_v12_1_xcp_suspend(void *handle, uint32_t inst_mask)
-- 
2.52.0

next prev parent reply	other threads:[~2025-12-10  7:14 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-10  7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
2025-12-10  7:13 ` [PATCH] drm/amdgpu: Add soc v1_0 ih client id table Alex Deucher
2025-12-10  7:13 ` [PATCH] drm/amdkfd: Update CWSR area calculations for GFX 12.1 Alex Deucher
2025-12-10  7:13 ` [PATCH] drm/amdgpu: Fix CU info " Alex Deucher
2025-12-10  7:13 ` [PATCH] drm/amdgpu: init RS64_MEC_P2/P3_STACK for gfx12.1 Alex Deucher
2025-12-10  7:14 ` [PATCH] drm/amdgpu: Enable 5-level page table for GFX 12.1.0 Alex Deucher
2025-12-10  7:14 ` [PATCH] drm/amdkfd: Update LDS, Scratch base for 57bit address Alex Deucher
2025-12-10  7:14 ` [PATCH] drm/amdgpu: Add pde3 table invalidation request for GFX 12.1.0 Alex Deucher
2025-12-10  7:14 ` [PATCH] drm/amdgpu: Support 57bit fault address " Alex Deucher
2025-12-10  7:14 ` Alex Deucher [this message]
2025-12-10  7:14 ` [PATCH] drm/amdgpu: Correct xcc_id input to GET_INST from physical to logic Alex Deucher
2025-12-10  7:14 ` [PATCH] drm/amdgpu: use physical xcc id to get rrmt Alex Deucher
2025-12-10  7:14 ` [PATCH] drm/amdgpu: Correct inst_id input from physical to logic Alex Deucher
2025-12-10  7:14 ` [PATCH] drm/amdgpu: support xcc harvest for ih translate Alex Deucher
2025-12-10  7:14 ` [PATCH] drm/amdgpu: normalize reg addr as local xcc for gfx v12_1 Alex Deucher
2025-12-10  7:14 ` [PATCH] drm/amdgpu/mes_v12_1: fix mes access xcd register Alex Deucher
2025-12-10  7:14 ` [PATCH] drm/amdgpu: add gfx sysfs support for gfx_v12_1 Alex Deucher
2025-12-10  7:14 ` [PATCH] drm/amdgpu: correct rlc autoload for xcc harvest Alex Deucher
2025-12-10  7:14 ` [PATCH] drm/amdkfd: Override KFD SVM mappings for GFX 12.1 Alex Deucher
2025-12-10  7:14 ` [PATCH] drm/amdgpu: Add gfx v12_1 interrupt source header Alex Deucher

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:61ffba9a252b dfblob:347912596a1b )
 OR (
bs:"[PATCH] drm/amdgpu: Fix CP_MEC_MDBASE in multi-xcc for gfx v12_1" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251210071415.19983-10-alexander.deucher@amd.com \
    --to=alexander.deucher@amd.com \
    --cc=Harish.Kasiviswanathan@amd.com \
    --cc=Shaoyun.liu@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=michael.chen@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox