* [PATCH] drm/amdgpu: Add soc v1_0 ih client id table
2025-12-10 7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
@ 2025-12-10 7:13 ` Alex Deucher
2025-12-10 7:13 ` [PATCH] drm/amdkfd: Update CWSR area calculations for GFX 12.1 Alex Deucher
` (17 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Alex Deucher @ 2025-12-10 7:13 UTC (permalink / raw)
To: amd-gfx; +Cc: Hawking Zhang, Likun Gao, Alex Deucher
From: Hawking Zhang <Hawking.Zhang@amd.com>
To acommandate the specific ih client for soc v1_0
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Likun Gao <Likun.Gao@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 35 +++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h | 1 +
drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c | 6 +--
drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c | 6 +--
drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c | 4 +-
.../drm/amd/include/soc_v1_0_ih_clientid.h | 52 +++++++++++++++++++
6 files changed, 96 insertions(+), 8 deletions(-)
create mode 100644 drivers/gpu/drm/amd/include/soc_v1_0_ih_clientid.h
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 8112ffc85995e..0a1bf61a11320 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -99,6 +99,41 @@ const char *soc15_ih_clientid_name[] = {
"MP1"
};
+const char *soc_v1_0_ih_clientid_name[] = {
+ "IH",
+ "Reserved",
+ "ATHUB",
+ "BIF",
+ "Reserved",
+ "Reserved",
+ "Reserved",
+ "RLC",
+ "Reserved",
+ "Reserved",
+ "GFX",
+ "IMU",
+ "Reserved",
+ "Reserved",
+ "VCN1 or UVD1",
+ "THM",
+ "VCN or UVD",
+ "Reserved",
+ "VMC",
+ "Reserved",
+ "GRBM_CP",
+ "GC_AID",
+ "ROM_SMUIO",
+ "DF",
+ "Reserved",
+ "PWR",
+ "LSDMA",
+ "GC_UTCL2",
+ "nHT",
+ "Reserved",
+ "MP0",
+ "MP1",
+};
+
const int node_id_to_phys_map[NODEID_MAX] = {
[AID0_NODEID] = 0,
[XCD0_NODEID] = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
index 9f0417456abda..af72405a72262 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -26,6 +26,7 @@
#include <linux/irqdomain.h>
#include "soc15_ih_clientid.h"
+#include "soc_v1_0_ih_clientid.h"
#include "amdgpu_ih.h"
#define AMDGPU_MAX_IRQ_SRC_ID 0x100
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
index a30fac3d57609..a8f020a375c92 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -1140,21 +1140,21 @@ static int gfx_v12_1_sw_init(struct amdgpu_ip_block *ip_block)
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
/* EOP Event */
- r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP,
GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
&adev->gfx.eop_irq);
if (r)
return r;
/* Privileged reg */
- r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP,
GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
&adev->gfx.priv_reg_irq);
if (r)
return r;
/* Privileged inst */
- r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP,
GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
&adev->gfx.priv_inst_irq);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
index fa46b0089e8dd..19cd38ce57c94 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
@@ -124,7 +124,7 @@ static int gmc_v12_1_process_interrupt(struct amdgpu_device *adev,
write_fault = !!(entry->src_data[1] & 0x200000);
}
- if (entry->client_id == SOC21_IH_CLIENTID_VMC) {
+ if (entry->client_id == SOC_V1_0_IH_CLIENTID_VMC) {
hub_name = "mmhub0";
vmhub = AMDGPU_MMHUB0(node_id / 4);
} else {
@@ -198,8 +198,8 @@ static int gmc_v12_1_process_interrupt(struct amdgpu_device *adev,
amdgpu_vm_put_task_info(task_info);
}
- dev_err(adev->dev, " in page starting at address 0x%016llx from IH client %d\n",
- addr, entry->client_id);
+ dev_err(adev->dev, " in page starting at address 0x%016llx from IH client %d (%s)\n",
+ addr, entry->client_id, soc_v1_0_ih_clientid_name[entry->client_id]);
if (amdgpu_sriov_vf(adev))
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c
index 753512276e373..446b7527f5c62 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c
@@ -1277,7 +1277,7 @@ static int sdma_v7_1_sw_init(struct amdgpu_ip_block *ip_block)
u32 xcc_id;
/* SDMA trap event */
- r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GFX,
GFX_11_0_0__SRCID__SDMA_TRAP,
&adev->sdma.trap_irq);
if (r)
@@ -1526,7 +1526,7 @@ static int sdma_v7_1_process_trap_irq(struct amdgpu_device *adev,
}
switch (entry->client_id) {
- case SOC21_IH_CLIENTID_GFX:
+ case SOC_V1_0_IH_CLIENTID_GFX:
switch (queue) {
case 0:
amdgpu_fence_process(&adev->sdma.instance[instances].ring);
diff --git a/drivers/gpu/drm/amd/include/soc_v1_0_ih_clientid.h b/drivers/gpu/drm/amd/include/soc_v1_0_ih_clientid.h
new file mode 100644
index 0000000000000..11ab4b7d61e10
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/soc_v1_0_ih_clientid.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SOC_V1_0_IH_CLIENTID_H__
+#define __SOC_V1_0_IH_CLIENTID_H__
+
+extern const char *soc_v1_0_ih_clientid_name[];
+
+enum soc_v1_0_ih_clientid {
+ SOC_V1_0_IH_CLIENTID_IH = 0x00,
+ SOC_V1_0_IH_CLIENTID_ATHUB = 0x02,
+ SOC_V1_0_IH_CLIENTID_BIF = 0x03,
+ SOC_V1_0_IH_CLIENTID_RLC = 0x07,
+ SOC_V1_0_IH_CLIENTID_GFX = 0x0a,
+ SOC_V1_0_IH_CLIENTID_IMU = 0x0b,
+ SOC_V1_0_IH_CLIENTID_VCN1 = 0x0e,
+ SOC_V1_0_IH_CLIENTID_THM = 0x0f,
+ SOC_V1_0_IH_CLIENTID_VCN = 0x10,
+ SOC_V1_0_IH_CLIENTID_VMC = 0x12,
+ SOC_V1_0_IH_CLIENTID_GRBM_CP = 0x14,
+ SOC_V1_0_IH_CLIENTID_GC_AID = 0x15,
+ SOC_V1_0_IH_CLIENTID_ROM_SMUIO = 0x16,
+ SOC_V1_0_IH_CLIENTID_DF = 0x17,
+ SOC_V1_0_IH_CLIENTID_PWR = 0x19,
+ SOC_V1_0_IH_CLIENTID_LSDMA = 0x1a,
+ SOC_V1_0_IH_CLIENTID_GC_UTCL2 = 0x1b,
+ SOC_V1_0_IH_CLIENTID_nHT = 0X1c,
+ SOC_V1_0_IH_CLIENTID_MP0 = 0x1e,
+ SOC_V1_0_IH_CLIENTID_MP1 = 0x1f,
+ SOC_V1_0_IH_CLIENTID_MAX,
+};
+
+#endif
--
2.52.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH] drm/amdkfd: Update CWSR area calculations for GFX 12.1
2025-12-10 7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
2025-12-10 7:13 ` [PATCH] drm/amdgpu: Add soc v1_0 ih client id table Alex Deucher
@ 2025-12-10 7:13 ` Alex Deucher
2025-12-10 7:13 ` [PATCH] drm/amdgpu: Fix CU info " Alex Deucher
` (16 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Alex Deucher @ 2025-12-10 7:13 UTC (permalink / raw)
To: amd-gfx; +Cc: Mukul Joshi, Feifei Xu, Alex Deucher
From: Mukul Joshi <mukul.joshi@amd.com>
Update the SGPR, VGPR, HWREG size and number of waves supported
for GFX 12.1 CWSR memory limits. The CU calculation changed in
topology, as a result, the values need to be updated.
Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Reviewed-by: Feifei Xu <Feifei.Xu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 63 ++++++++++++++++++++++----
1 file changed, 54 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
index 80c4fa2b0975d..56c97189e7f12 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -392,12 +392,20 @@ int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd,
return 0;
}
-#define SGPR_SIZE_PER_CU 0x4000
-#define LDS_SIZE_PER_CU 0x10000
-#define HWREG_SIZE_PER_CU 0x1000
#define DEBUGGER_BYTES_ALIGN 64
#define DEBUGGER_BYTES_PER_WAVE 32
+static u32 kfd_get_sgpr_size_per_cu(u32 gfxv)
+{
+ u32 sgpr_size = 0x4000;
+
+ if (gfxv == 120500 ||
+ gfxv == 120501)
+ sgpr_size = 0x8000;
+
+ return sgpr_size;
+}
+
static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
{
u32 vgpr_size = 0x40000;
@@ -413,14 +421,53 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
gfxv == 120000 || /* GFX_VERSION_GFX1200 */
gfxv == 120001) /* GFX_VERSION_GFX1201 */
vgpr_size = 0x60000;
+ else if (gfxv == 120500 || /* GFX_VERSION_GFX1250 */
+ gfxv == 120501) /* GFX_VERSION_GFX1251 */
+ vgpr_size = 0x80000;
return vgpr_size;
}
+static u32 kfd_get_hwreg_size_per_cu(u32 gfxv)
+{
+ u32 hwreg_size = 0x1000;
+
+ if (gfxv == 120500 || gfxv == 120501)
+ hwreg_size = 0x8000;
+
+ return hwreg_size;
+}
+
+static u32 kfd_get_lds_size_per_cu(u32 gfxv, struct kfd_node_properties *props)
+{
+ u32 lds_size = 0x10000;
+
+ if (gfxv == 90500 || gfxv == 120500 || gfxv == 120501)
+ lds_size = props->lds_size_in_kb << 10;
+
+ return lds_size;
+}
+
+static u32 get_num_waves(struct kfd_node_properties *props, u32 gfxv, u32 cu_num)
+{
+ u32 wave_num = 0;
+
+ if (gfxv < 100100)
+ wave_num = min(cu_num * 40,
+ props->array_count / props->simd_arrays_per_engine * 512);
+ else if (gfxv < 120500)
+ wave_num = cu_num * 32;
+ else if (gfxv <= 120501)
+ wave_num = cu_num * 64;
+
+ WARN_ON(wave_num == 0);
+
+ return wave_num;
+}
+
#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props) \
- (kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\
- (((gfxv) == 90500) ? (props->lds_size_in_kb << 10) : LDS_SIZE_PER_CU) +\
- HWREG_SIZE_PER_CU)
+ (kfd_get_vgpr_size_per_cu(gfxv) + kfd_get_sgpr_size_per_cu(gfxv) +\
+ kfd_get_lds_size_per_cu(gfxv, props) + kfd_get_hwreg_size_per_cu(gfxv))
#define CNTL_STACK_BYTES_PER_WAVE(gfxv) \
((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/
@@ -440,9 +487,7 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev)
return;
cu_num = props->simd_count / props->simd_per_cu / NUM_XCC(dev->gpu->xcc_mask);
- wave_num = (gfxv < 100100) ? /* GFX_VERSION_NAVI10 */
- min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512)
- : cu_num * 32;
+ wave_num = get_num_waves(props, gfxv, cu_num);
wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), PAGE_SIZE);
ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8;
--
2.52.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH] drm/amdgpu: Fix CU info calculations for GFX 12.1
2025-12-10 7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
2025-12-10 7:13 ` [PATCH] drm/amdgpu: Add soc v1_0 ih client id table Alex Deucher
2025-12-10 7:13 ` [PATCH] drm/amdkfd: Update CWSR area calculations for GFX 12.1 Alex Deucher
@ 2025-12-10 7:13 ` Alex Deucher
2025-12-10 7:13 ` [PATCH] drm/amdgpu: init RS64_MEC_P2/P3_STACK for gfx12.1 Alex Deucher
` (15 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Alex Deucher @ 2025-12-10 7:13 UTC (permalink / raw)
To: amd-gfx; +Cc: Mukul Joshi, Lijo Lazar, Alex Deucher
From: Mukul Joshi <mukul.joshi@amd.com>
This patch fixes the CU info calculations for gfx 12.1.
Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c | 78 +++++++++-----------------
1 file changed, 27 insertions(+), 51 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
index a8f020a375c92..f5a7ccf9e02d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -46,6 +46,7 @@
#include "mes_v12_1.h"
#define GFX12_MEC_HPD_SIZE 2048
+#define NUM_SIMD_PER_CU_GFX12_1 4
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
@@ -69,9 +70,6 @@ static int gfx_v12_1_get_cu_info(struct amdgpu_device *adev,
static uint64_t gfx_v12_1_get_gpu_clock_counter(struct amdgpu_device *adev);
static void gfx_v12_1_xcc_select_se_sh(struct amdgpu_device *adev, u32 se_num,
u32 sh_num, u32 instance, int xcc_id);
-static u32 gfx_v12_1_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev,
- int xcc_id);
-
static void gfx_v12_1_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
uint32_t val);
static int gfx_v12_1_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
@@ -3804,7 +3802,7 @@ static void gfx_v12_1_set_mqd_funcs(struct amdgpu_device *adev)
gfx_v12_1_compute_mqd_init;
}
-static void gfx_v12_1_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
+static void gfx_v12_1_set_user_cu_inactive_bitmap_per_sh(struct amdgpu_device *adev,
u32 bitmap, int xcc_id)
{
u32 data;
@@ -3818,39 +3816,20 @@ static void gfx_v12_1_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, data);
}
-static u32 gfx_v12_1_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev,
- int xcc_id)
+static u32 gfx_v12_1_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev,
+ int xcc_id)
{
- u32 data, wgp_bitmask;
+ u32 data, mask;
+
data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SHADER_ARRAY_CONFIG);
data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG);
data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
- wgp_bitmask =
- amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
-
- return (~data) & wgp_bitmask;
-}
-
-static u32 gfx_v12_1_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev,
- int xcc_id)
-{
- u32 wgp_idx, wgp_active_bitmap;
- u32 cu_bitmap_per_wgp, cu_active_bitmap;
-
- wgp_active_bitmap = gfx_v12_1_get_wgp_active_bitmap_per_sh(adev, xcc_id);
- cu_active_bitmap = 0;
+ mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
- for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
- /* if there is one WGP enabled, it means 2 CUs will be enabled */
- cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
- if (wgp_active_bitmap & (1 << wgp_idx))
- cu_active_bitmap |= cu_bitmap_per_wgp;
- }
-
- return cu_active_bitmap;
+ return (~data) & mask;
}
static int gfx_v12_1_get_cu_info(struct amdgpu_device *adev,
@@ -3858,12 +3837,23 @@ static int gfx_v12_1_get_cu_info(struct amdgpu_device *adev,
{
int i, j, k, counter, xcc_id, active_cu_number = 0;
u32 mask, bitmap;
- unsigned disable_masks[8 * 2];
+ unsigned int disable_masks[2 * 2];
if (!adev || !cu_info)
return -EINVAL;
- amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
+ if (adev->gfx.config.max_shader_engines > 2 ||
+ adev->gfx.config.max_sh_per_se > 2) {
+ dev_err(adev->dev,
+ "Max SE (%d) and Max SA per SE (%d) is greater than expected\n",
+ adev->gfx.config.max_shader_engines,
+ adev->gfx.config.max_sh_per_se);
+ return -EINVAL;
+ }
+
+ amdgpu_gfx_parse_disable_cu(disable_masks,
+ adev->gfx.config.max_shader_engines,
+ adev->gfx.config.max_sh_per_se);
mutex_lock(&adev->grbm_idx_mutex);
for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
@@ -3875,27 +3865,13 @@ static int gfx_v12_1_get_cu_info(struct amdgpu_device *adev,
mask = 1;
counter = 0;
gfx_v12_1_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id);
- if (i < 8 && j < 2)
- gfx_v12_1_set_user_wgp_inactive_bitmap_per_sh(
- adev, disable_masks[i * 2 + j], xcc_id);
+ gfx_v12_1_set_user_cu_inactive_bitmap_per_sh(
+ adev,
+ disable_masks[i * adev->gfx.config.max_sh_per_se + j],
+ xcc_id);
bitmap = gfx_v12_1_get_cu_active_bitmap_per_sh(adev, xcc_id);
- /**
- * GFX12 could support more than 4 SEs, while the bitmap
- * in cu_info struct is 4x4 and ioctl interface struct
- * drm_amdgpu_info_device should keep stable.
- * So we use last two columns of bitmap to store cu mask for
- * SEs 4 to 7, the layout of the bitmap is as below:
- * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
- * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
- * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
- * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
- * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
- * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
- * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
- * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
- */
- cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
+ cu_info->bitmap[xcc_id][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask)
@@ -3911,7 +3887,7 @@ static int gfx_v12_1_get_cu_info(struct amdgpu_device *adev,
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
- cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU_GFX12_1;
cu_info->lds_size = 320;
return 0;
--
2.52.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH] drm/amdgpu: init RS64_MEC_P2/P3_STACK for gfx12.1
2025-12-10 7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
` (2 preceding siblings ...)
2025-12-10 7:13 ` [PATCH] drm/amdgpu: Fix CU info " Alex Deucher
@ 2025-12-10 7:13 ` Alex Deucher
2025-12-10 7:14 ` [PATCH] drm/amdgpu: Enable 5-level page table for GFX 12.1.0 Alex Deucher
` (14 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Alex Deucher @ 2025-12-10 7:13 UTC (permalink / raw)
To: amd-gfx; +Cc: Feifei Xu, Lijo Lazar, Alex Deucher
From: Feifei Xu <Feifei.Xu@amd.com>
Add GFX12.1 MEC P2/P3 STACK firmware init.
Signed-off-by: Feifei Xu <Feifei.Xu@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
index f5a7ccf9e02d5..c7f331f74b9b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -416,6 +416,8 @@ static int gfx_v12_1_init_microcode(struct amdgpu_device *adev)
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
err = gfx_v12_1_init_toc_microcode(adev, ucode_prefix);
--
2.52.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH] drm/amdgpu: Enable 5-level page table for GFX 12.1.0
2025-12-10 7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
` (3 preceding siblings ...)
2025-12-10 7:13 ` [PATCH] drm/amdgpu: init RS64_MEC_P2/P3_STACK for gfx12.1 Alex Deucher
@ 2025-12-10 7:14 ` Alex Deucher
2025-12-10 7:14 ` [PATCH] drm/amdkfd: Update LDS, Scratch base for 57bit address Alex Deucher
` (13 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Alex Deucher @ 2025-12-10 7:14 UTC (permalink / raw)
To: amd-gfx; +Cc: Philip Yang, Felix Kuehling, Alex Deucher
From: Philip Yang <Philip.Yang@amd.com>
GFX 12.1.0 support 57bit virtual, 52bit physical address, set PDE
max_level to 4, min_vm_size to 128PB to enable GPU vm 5-level page
tables to support 57bit virtual address.
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Acked-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index e63ad5f18a8e1..57aabe0dcfdfe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -809,11 +809,11 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
for (i = 0; i < hweight32(adev->aid_mask); i++)
set_bit(AMDGPU_MMHUB0(i), adev->vmhubs_mask);
/*
- * To fulfill 4-level page support,
- * vm size is 256TB (48bit), maximum size,
+ * To fulfill 5-level page support,
+ * vm size is 128PetaByte (57bit), maximum size,
* block size 512 (9bit)
*/
- amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ amdgpu_vm_adjust_size(adev, 128 * 1024 * 1024, 9, 4, 57);
break;
default:
break;
--
2.52.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH] drm/amdkfd: Update LDS, Scratch base for 57bit address
2025-12-10 7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
` (4 preceding siblings ...)
2025-12-10 7:14 ` [PATCH] drm/amdgpu: Enable 5-level page table for GFX 12.1.0 Alex Deucher
@ 2025-12-10 7:14 ` Alex Deucher
2025-12-10 7:14 ` [PATCH] drm/amdgpu: Add pde3 table invalidation request for GFX 12.1.0 Alex Deucher
` (12 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Alex Deucher @ 2025-12-10 7:14 UTC (permalink / raw)
To: amd-gfx; +Cc: Philip Yang, Felix Kuehling, Alex Deucher
From: Philip Yang <Philip.Yang@amd.com>
For 5-level page tables, update compute vmid sh_mem_base LDS aperture
and Scratch aperture base address to above 57-bit, use the same setting
from gfx vmid, we can remove the duplicate macro.
Update queue pdd lds_base and scratch_base to the same value as
sh_mem_base setting. Then application get process apertures return the
correct value to access LDS and Scratch memory for 57bit address 5-level
page tables. This may pass to MES in future when mapping queue.
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Acked-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c | 14 ++++++--------
drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 10 ++++++++--
2 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
index c7f331f74b9b8..61ffba9a252bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -1362,9 +1362,6 @@ static void gfx_v12_1_setup_rb(struct amdgpu_device *adev)
adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
}
-#define LDS_APP_BASE 0x2000
-#define SCRATCH_APP_BASE 0x4
-
static void gfx_v12_1_xcc_init_compute_vmid(struct amdgpu_device *adev,
int xcc_id)
{
@@ -1374,12 +1371,13 @@ static void gfx_v12_1_xcc_init_compute_vmid(struct amdgpu_device *adev,
/*
* Configure apertures:
- * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
- * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
- * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ * LDS: 0x20000000'00000000 - 0x20000001'00000000 (4GB)
+ * Scratch: 0x10000000'00000000 - 0x10000001'00000000 (4GB)
*/
- sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
- (SCRATCH_APP_BASE << SH_MEM_BASES__PRIVATE_BASE__SHIFT);
+ sh_mem_bases = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+ (adev->gmc.private_aperture_start >> 58));
+ sh_mem_bases = REG_SET_FIELD(sh_mem_bases, SH_MEM_BASES, SHARED_BASE,
+ (adev->gmc.shared_aperture_start >> 48));
mutex_lock(&adev->srbm_mutex);
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 1d170dc50df33..557a5ade329ac 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -342,14 +342,20 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)
{
- pdd->lds_base = MAKE_LDS_APP_BASE_V9();
+ if (pdd->dev->adev->vm_manager.root_level == AMDGPU_VM_PDB3)
+ pdd->lds_base = pdd->dev->adev->gmc.shared_aperture_start;
+ else
+ pdd->lds_base = MAKE_LDS_APP_BASE_V9();
pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
pdd->gpuvm_base = AMDGPU_VA_RESERVED_BOTTOM;
pdd->gpuvm_limit =
pdd->dev->kfd->shared_resources.gpuvm_size - 1;
- pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
+ if (pdd->dev->adev->vm_manager.root_level == AMDGPU_VM_PDB3)
+ pdd->scratch_base = pdd->dev->adev->gmc.private_aperture_start;
+ else
+ pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
/*
--
2.52.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH] drm/amdgpu: Add pde3 table invalidation request for GFX 12.1.0
2025-12-10 7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
` (5 preceding siblings ...)
2025-12-10 7:14 ` [PATCH] drm/amdkfd: Update LDS, Scratch base for 57bit address Alex Deucher
@ 2025-12-10 7:14 ` Alex Deucher
2025-12-10 7:14 ` [PATCH] drm/amdgpu: Support 57bit fault address " Alex Deucher
` (11 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Alex Deucher @ 2025-12-10 7:14 UTC (permalink / raw)
To: amd-gfx; +Cc: Philip Yang, Christian König, Felix Kuehling, Alex Deucher
From: Philip Yang <Philip.Yang@amd.com>
Set pde3 invalidation request bit during tlb flush for up to 5 level
page table.
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Acked-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c | 2 ++
drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c | 1 +
2 files changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c
index ab002f327f763..8f1819d15bc61 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c
@@ -668,6 +668,8 @@ static uint32_t gfxhub_v12_1_get_invalidate_req(unsigned int vmid,
INVALIDATE_L2_PDE1, 1);
req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ INVALIDATE_L2_PDE3, 1);
req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
INVALIDATE_L1_PTES, 1);
req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c
index 75f7df7db5b66..7e917eb47a8c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c
@@ -658,6 +658,7 @@ static uint32_t mmhub_v4_2_0_get_invalidate_req(unsigned int vmid,
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE3, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
--
2.52.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH] drm/amdgpu: Support 57bit fault address for GFX 12.1.0
2025-12-10 7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
` (6 preceding siblings ...)
2025-12-10 7:14 ` [PATCH] drm/amdgpu: Add pde3 table invalidation request for GFX 12.1.0 Alex Deucher
@ 2025-12-10 7:14 ` Alex Deucher
2025-12-10 7:14 ` [PATCH] drm/amdgpu: Fix CP_MEC_MDBASE in multi-xcc for gfx v12_1 Alex Deucher
` (10 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Alex Deucher @ 2025-12-10 7:14 UTC (permalink / raw)
To: amd-gfx; +Cc: Philip Yang, Felix Kuehling, Alex Deucher
From: Philip Yang <Philip.Yang@amd.com>
The gmc fault virtual address is up to 57bit for 5 level page table,
this also works with 48bit virtual address for 4 level page table.
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Acked-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
index 19cd38ce57c94..28b3732df016a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
@@ -117,7 +117,7 @@ static int gmc_v12_1_process_interrupt(struct amdgpu_device *adev,
node_id = entry->node_id;
addr = (u64)entry->src_data[0] << 12;
- addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+ addr |= ((u64)entry->src_data[1] & 0x1fff) << 44;
if (entry->src_id == UTCL2_1_0__SRCID__RETRY) {
retry_fault = true;
--
2.52.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH] drm/amdgpu: Fix CP_MEC_MDBASE in multi-xcc for gfx v12_1
2025-12-10 7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
` (7 preceding siblings ...)
2025-12-10 7:14 ` [PATCH] drm/amdgpu: Support 57bit fault address " Alex Deucher
@ 2025-12-10 7:14 ` Alex Deucher
2025-12-10 7:14 ` [PATCH] drm/amdgpu: Correct xcc_id input to GET_INST from physical to logic Alex Deucher
` (9 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Alex Deucher @ 2025-12-10 7:14 UTC (permalink / raw)
To: amd-gfx; +Cc: Michael Chen, Harish Kasiviswanathan, Shaoyun.liu, Alex Deucher
From: Michael Chen <michael.chen@amd.com>
Need to allocate memory for MEC FW data and program
registers CP_MEC_MDBASE for each XCC respectively.
Signed-off-by: Michael Chen <michael.chen@amd.com>
Acked-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Reviewed-by: Shaoyun.liu <Shaoyun.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c | 192 +++++++++++++------------
1 file changed, 98 insertions(+), 94 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
index 61ffba9a252bd..347912596a1b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -1887,20 +1887,18 @@ static void gfx_v12_1_xcc_cp_compute_enable(struct amdgpu_device *adev,
}
static int gfx_v12_1_xcc_cp_compute_load_microcode_rs64(struct amdgpu_device *adev,
- int xcc_id)
+ uint16_t xcc_mask)
{
const struct gfx_firmware_header_v2_0 *mec_hdr;
const __le32 *fw_ucode, *fw_data;
u32 tmp, fw_ucode_size, fw_data_size;
u32 i, usec_timeout = 50000; /* Wait for 50 ms */
u32 *fw_ucode_ptr, *fw_data_ptr;
- int r;
+ int r, xcc_id;
if (!adev->gfx.mec_fw)
return -EINVAL;
- gfx_v12_1_xcc_cp_compute_enable(adev, false, xcc_id);
-
mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
@@ -1925,7 +1923,7 @@ static int gfx_v12_1_xcc_cp_compute_load_microcode_rs64(struct amdgpu_device *ad
r = amdgpu_bo_create_reserved(adev,
ALIGN(fw_data_size, 64 * 1024) *
- adev->gfx.mec.num_pipe_per_mec,
+ adev->gfx.mec.num_pipe_per_mec * NUM_XCC(xcc_mask),
64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.mec.mec_fw_data_obj,
&adev->gfx.mec.mec_fw_data_gpu_addr,
@@ -1937,8 +1935,12 @@ static int gfx_v12_1_xcc_cp_compute_load_microcode_rs64(struct amdgpu_device *ad
}
memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
- for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
- memcpy(fw_data_ptr + i * ALIGN(fw_data_size, 64 * 1024) / 4, fw_data, fw_data_size);
+ for_each_inst(xcc_id, xcc_mask) {
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ u32 offset = (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) *
+ ALIGN(fw_data_size, 64 * 1024) / 4;
+ memcpy(fw_data_ptr + offset, fw_data, fw_data_size);
+ }
}
amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
@@ -1946,75 +1948,81 @@ static int gfx_v12_1_xcc_cp_compute_load_microcode_rs64(struct amdgpu_device *ad
amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
- tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL);
- tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
- tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
- tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
- WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL, tmp);
-
- tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL);
- tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
- tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
- WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL, tmp);
-
- mutex_lock(&adev->srbm_mutex);
- for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
- soc_v1_0_grbm_select(adev, 1, i, 0, 0, GET_INST(GC, xcc_id));
+ for_each_inst(xcc_id, xcc_mask) {
+ gfx_v12_1_xcc_cp_compute_enable(adev, false, xcc_id);
- WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_LO,
- lower_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
- i * ALIGN(fw_data_size, 64 * 1024)));
- WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_HI,
- upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
- i * ALIGN(fw_data_size, 64 * 1024)));
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL, tmp);
- WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_LO,
- lower_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
- WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_HI,
- upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
- }
- mutex_unlock(&adev->srbm_mutex);
- soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, 0));
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL, tmp);
- /* Trigger an invalidation of the L1 instruction caches */
- tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL);
- tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
- WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL, tmp);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ soc_v1_0_grbm_select(adev, 1, i, 0, 0, GET_INST(GC, xcc_id));
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_LO,
+ lower_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
+ (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) *
+ ALIGN(fw_data_size, 64 * 1024)));
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
+ (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) *
+ ALIGN(fw_data_size, 64 * 1024)));
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_LO,
+ lower_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+ }
+ mutex_unlock(&adev->srbm_mutex);
+ soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, 0));
- /* Wait for invalidation complete */
- for (i = 0; i < usec_timeout; i++) {
+ /* Trigger an invalidation of the L1 instruction caches */
tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL);
- if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
- INVALIDATE_DCACHE_COMPLETE))
- break;
- udelay(1);
- }
-
- if (i >= usec_timeout) {
- dev_err(adev->dev, "failed to invalidate instruction cache\n");
- return -EINVAL;
- }
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
- /* Trigger an invalidation of the L1 instruction caches */
- tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL);
- tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
- WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL, tmp);
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
- /* Wait for invalidation complete */
- for (i = 0; i < usec_timeout; i++) {
+ /* Trigger an invalidation of the L1 instruction caches */
tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL);
- if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
- INVALIDATE_CACHE_COMPLETE))
- break;
- udelay(1);
- }
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
- if (i >= usec_timeout) {
- dev_err(adev->dev, "failed to invalidate instruction cache\n");
- return -EINVAL;
- }
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
- gfx_v12_1_xcc_set_mec_ucode_start_addr(adev, xcc_id);
+ gfx_v12_1_xcc_set_mec_ucode_start_addr(adev, xcc_id);
+ }
return 0;
}
@@ -2411,42 +2419,43 @@ static int gfx_v12_1_xcc_kcq_resume(struct amdgpu_device *adev,
return r;
}
-static int gfx_v12_1_xcc_cp_resume(struct amdgpu_device *adev,
- int xcc_id)
+static int gfx_v12_1_xcc_cp_resume(struct amdgpu_device *adev, uint16_t xcc_mask)
{
- int r, i;
+ int r, i, xcc_id;
struct amdgpu_ring *ring;
- if (!(adev->flags & AMD_IS_APU))
- gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, false, xcc_id);
-
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
/* legacy firmware loading */
- r = gfx_v12_1_xcc_cp_compute_load_microcode_rs64(adev, xcc_id);
+ r = gfx_v12_1_xcc_cp_compute_load_microcode_rs64(adev, xcc_mask);
if (r)
return r;
}
- gfx_v12_1_xcc_cp_set_doorbell_range(adev, xcc_id);
+ for_each_inst(xcc_id, xcc_mask) {
+ if (!(adev->flags & AMD_IS_APU))
+ gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, false, xcc_id);
- gfx_v12_1_xcc_cp_compute_enable(adev, true, xcc_id);
+ gfx_v12_1_xcc_cp_set_doorbell_range(adev, xcc_id);
- if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
- r = amdgpu_mes_kiq_hw_init(adev, xcc_id);
- else
- r = gfx_v12_1_xcc_kiq_resume(adev, xcc_id);
- if (r)
- return r;
+ gfx_v12_1_xcc_cp_compute_enable(adev, true, xcc_id);
- r = gfx_v12_1_xcc_kcq_resume(adev, xcc_id);
- if (r)
- return r;
+ if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
+ r = amdgpu_mes_kiq_hw_init(adev, xcc_id);
+ else
+ r = gfx_v12_1_xcc_kiq_resume(adev, xcc_id);
+ if (r)
+ return r;
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
- r = amdgpu_ring_test_helper(ring);
+ r = gfx_v12_1_xcc_kcq_resume(adev, xcc_id);
if (r)
return r;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
}
return 0;
@@ -3923,14 +3932,9 @@ static int gfx_v12_1_xcp_resume(void *handle, uint32_t inst_mask)
}
}
- tmp_mask = inst_mask;
- for_each_inst(i, tmp_mask) {
- r = gfx_v12_1_xcc_cp_resume(adev, i);
- if (r)
- return r;
- }
+ r = gfx_v12_1_xcc_cp_resume(adev, inst_mask);
- return 0;
+ return r;
}
static int gfx_v12_1_xcp_suspend(void *handle, uint32_t inst_mask)
--
2.52.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH] drm/amdgpu: Correct xcc_id input to GET_INST from physical to logic
2025-12-10 7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
` (8 preceding siblings ...)
2025-12-10 7:14 ` [PATCH] drm/amdgpu: Fix CP_MEC_MDBASE in multi-xcc for gfx v12_1 Alex Deucher
@ 2025-12-10 7:14 ` Alex Deucher
2025-12-10 7:14 ` [PATCH] drm/amdgpu: use physical xcc id to get rrmt Alex Deucher
` (8 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Alex Deucher @ 2025-12-10 7:14 UTC (permalink / raw)
To: amd-gfx; +Cc: Likun Gao, Hawking Zhang, Alex Deucher
From: Likun Gao <Likun.Gao@amd.com>
Correct xcc_id input to GET_INST from physical to logic for
gfx_v12_1.
Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c | 6 +--
drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c | 53 +++++++++--------------
drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c | 2 +-
3 files changed, 24 insertions(+), 37 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
index 347912596a1b0..2f94c44bd6e11 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -1935,7 +1935,7 @@ static int gfx_v12_1_xcc_cp_compute_load_microcode_rs64(struct amdgpu_device *ad
}
memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
- for_each_inst(xcc_id, xcc_mask) {
+ for (xcc_id = 0; xcc_id < NUM_XCC(xcc_mask); xcc_id++) {
for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
u32 offset = (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) *
ALIGN(fw_data_size, 64 * 1024) / 4;
@@ -1948,7 +1948,7 @@ static int gfx_v12_1_xcc_cp_compute_load_microcode_rs64(struct amdgpu_device *ad
amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
- for_each_inst(xcc_id, xcc_mask) {
+ for (xcc_id = 0; xcc_id < NUM_XCC(xcc_mask); xcc_id++) {
gfx_v12_1_xcc_cp_compute_enable(adev, false, xcc_id);
tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL);
@@ -2431,7 +2431,7 @@ static int gfx_v12_1_xcc_cp_resume(struct amdgpu_device *adev, uint16_t xcc_mask
return r;
}
- for_each_inst(xcc_id, xcc_mask) {
+ for (xcc_id = 0; xcc_id < NUM_XCC(xcc_mask); xcc_id++) {
if (!(adev->flags & AMD_IS_APU))
gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, false, xcc_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c
index 8f1819d15bc61..662542b9cc362 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c
@@ -65,7 +65,7 @@ static void gfxhub_v12_1_xcc_setup_vm_pt_regs(struct amdgpu_device *adev,
struct amdgpu_vmhub *hub;
int i;
- for_each_inst(i, xcc_mask) {
+ for (i = 0; i < NUM_XCC(xcc_mask); i++) {
hub = &adev->vmhub[AMDGPU_GFXHUB(i)];
WREG32_SOC15_OFFSET(GC, GET_INST(GC, i),
regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
@@ -83,10 +83,8 @@ static void gfxhub_v12_1_setup_vm_pt_regs(struct amdgpu_device *adev,
uint32_t vmid,
uint64_t page_table_base)
{
- uint32_t xcc_mask;
-
- xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
- gfxhub_v12_1_xcc_setup_vm_pt_regs(adev, vmid, page_table_base, xcc_mask);
+ gfxhub_v12_1_xcc_setup_vm_pt_regs(adev, vmid, page_table_base,
+ adev->gfx.xcc_mask);
}
static void gfxhub_v12_1_xcc_init_gart_aperture_regs(struct amdgpu_device *adev,
@@ -105,7 +103,7 @@ static void gfxhub_v12_1_xcc_init_gart_aperture_regs(struct amdgpu_device *adev,
/* If use GART for FB translation, vmid0 page table covers both
* vram and system memory (gart)
*/
- for_each_inst(i, xcc_mask) {
+ for (i = 0; i < NUM_XCC(xcc_mask); i++) {
if (adev->gmc.pdb0_bo) {
WREG32_SOC15(GC, GET_INST(GC, i),
regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
@@ -145,7 +143,7 @@ static void gfxhub_v12_1_xcc_init_system_aperture_regs(struct amdgpu_device *ade
uint32_t tmp;
int i;
- for_each_inst(i, xcc_mask) {
+ for (i = 0; i < NUM_XCC(xcc_mask); i++) {
/* Program the AGP BAR */
WREG32_SOC15_RLC(GC, GET_INST(GC, i),
regGCMC_VM_AGP_BASE_LO32, 0);
@@ -247,7 +245,7 @@ static void gfxhub_v12_1_xcc_init_tlb_regs(struct amdgpu_device *adev,
uint32_t tmp;
int i;
- for_each_inst(i, xcc_mask) {
+ for (i = 0; i < NUM_XCC(xcc_mask); i++) {
/* Setup TLB control */
tmp = RREG32_SOC15(GC, GET_INST(GC, i),
regGCMC_VM_MX_L1_TLB_CNTL);
@@ -282,7 +280,7 @@ static void gfxhub_v12_1_xcc_init_cache_regs(struct amdgpu_device *adev,
uint32_t tmp;
int i;
- for_each_inst(i, xcc_mask) {
+ for (i = 0; i < NUM_XCC(xcc_mask); i++) {
/* Setup L2 cache */
tmp = RREG32_SOC15(GC, GET_INST(GC, i), regGCVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
@@ -343,7 +341,7 @@ static void gfxhub_v12_1_xcc_enable_system_domain(struct amdgpu_device *adev,
uint32_t tmp;
int i;
- for_each_inst(i, xcc_mask) {
+ for (i = 0; i < NUM_XCC(xcc_mask); i++) {
tmp = RREG32_SOC15(GC, GET_INST(GC, i),
regGCVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
@@ -366,7 +364,7 @@ static void gfxhub_v12_1_xcc_disable_identity_aperture(struct amdgpu_device *ade
{
int i;
- for_each_inst(i, xcc_mask) {
+ for (i = 0; i < NUM_XCC(xcc_mask); i++) {
WREG32_SOC15(GC, GET_INST(GC, i),
regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
0XFFFFFFFF);
@@ -402,7 +400,7 @@ static void gfxhub_v12_1_xcc_setup_vmid_config(struct amdgpu_device *adev,
block_size = adev->vm_manager.block_size;
block_size -= 9;
- for_each_inst(j, xcc_mask) {
+ for (j = 0; j < NUM_XCC(xcc_mask); j++) {
hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
for (i = 0; i <= 14; i++) {
tmp = RREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
@@ -460,7 +458,7 @@ static void gfxhub_v12_1_xcc_program_invalidation(struct amdgpu_device *adev,
struct amdgpu_vmhub *hub;
unsigned int i, j;
- for_each_inst(j, xcc_mask) {
+ for (j = 0; j < NUM_XCC(xcc_mask); j++) {
hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
for (i = 0 ; i < 18; ++i) {
@@ -483,7 +481,7 @@ static int gfxhub_v12_1_xcc_gart_enable(struct amdgpu_device *adev,
/* GCMC_VM_FB_LOCATION_BASE/TOP are VF copy registers
* VBIO post does not program them at boot up phase
* Need driver to program them from guest side */
- for_each_inst(i, xcc_mask) {
+ for (i = 0; i < NUM_XCC(xcc_mask); i++) {
WREG32_SOC15(GC, GET_INST(GC, i),
regGCMC_VM_FB_LOCATION_BASE_LO32,
lower_32_bits(adev->gmc.vram_start >> 24));
@@ -516,10 +514,8 @@ static int gfxhub_v12_1_xcc_gart_enable(struct amdgpu_device *adev,
static int gfxhub_v12_1_gart_enable(struct amdgpu_device *adev)
{
- uint32_t xcc_mask;
-
- xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
- return gfxhub_v12_1_xcc_gart_enable(adev, xcc_mask);
+ return gfxhub_v12_1_xcc_gart_enable(adev,
+ adev->gfx.xcc_mask);
}
static void gfxhub_v12_1_xcc_gart_disable(struct amdgpu_device *adev,
@@ -529,7 +525,7 @@ static void gfxhub_v12_1_xcc_gart_disable(struct amdgpu_device *adev,
u32 tmp;
u32 i, j;
- for_each_inst(j, xcc_mask) {
+ for (j = 0; j < NUM_XCC(xcc_mask); j++) {
hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
/* Disable all tables */
for (i = 0; i < 16; i++)
@@ -559,10 +555,7 @@ static void gfxhub_v12_1_xcc_gart_disable(struct amdgpu_device *adev,
static void gfxhub_v12_1_gart_disable(struct amdgpu_device *adev)
{
- uint32_t xcc_mask;
-
- xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
- gfxhub_v12_1_xcc_gart_disable(adev, xcc_mask);
+ gfxhub_v12_1_xcc_gart_disable(adev, adev->gfx.xcc_mask);
}
static void gfxhub_v12_1_xcc_set_fault_enable_default(struct amdgpu_device *adev,
@@ -571,7 +564,7 @@ static void gfxhub_v12_1_xcc_set_fault_enable_default(struct amdgpu_device *adev
u32 tmp;
int i;
- for_each_inst(i, xcc_mask) {
+ for (i = 0; i < NUM_XCC(xcc_mask); i++) {
tmp = RREG32_SOC15(GC, GET_INST(GC, i),
regGCVM_L2_PROTECTION_FAULT_CNTL_LO32);
tmp = REG_SET_FIELD(tmp,
@@ -644,10 +637,7 @@ static void gfxhub_v12_1_xcc_set_fault_enable_default(struct amdgpu_device *adev
static void gfxhub_v12_1_set_fault_enable_default(struct amdgpu_device *adev,
bool value)
{
- uint32_t xcc_mask;
-
- xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
- gfxhub_v12_1_xcc_set_fault_enable_default(adev, value, xcc_mask);
+ gfxhub_v12_1_xcc_set_fault_enable_default(adev, value, adev->gfx.xcc_mask);
}
static uint32_t gfxhub_v12_1_get_invalidate_req(unsigned int vmid,
@@ -744,7 +734,7 @@ static void gfxhub_v12_1_xcc_init(struct amdgpu_device *adev, uint32_t xcc_mask)
struct amdgpu_vmhub *hub;
int i;
- for_each_inst(i, xcc_mask) {
+ for (i = 0; i < NUM_XCC(xcc_mask); i++) {
hub = &adev->vmhub[AMDGPU_GFXHUB(i)];
hub->ctx0_ptb_addr_lo32 =
@@ -800,10 +790,7 @@ static void gfxhub_v12_1_xcc_init(struct amdgpu_device *adev, uint32_t xcc_mask)
static void gfxhub_v12_1_init(struct amdgpu_device *adev)
{
- uint32_t xcc_mask;
-
- xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
- gfxhub_v12_1_xcc_init(adev, xcc_mask);
+ gfxhub_v12_1_xcc_init(adev, adev->gfx.xcc_mask);
}
static int gfxhub_v12_1_get_xgmi_info(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
index 28b3732df016a..061d1be723408 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
@@ -319,7 +319,7 @@ static void gmc_v12_1_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
- 1 << vmid, GET_INST(GC, 0));
+ 1 << vmid, 0);
return;
}
--
2.52.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH] drm/amdgpu: use physical xcc id to get rrmt
2025-12-10 7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
` (9 preceding siblings ...)
2025-12-10 7:14 ` [PATCH] drm/amdgpu: Correct xcc_id input to GET_INST from physical to logic Alex Deucher
@ 2025-12-10 7:14 ` Alex Deucher
2025-12-10 7:14 ` [PATCH] drm/amdgpu: Correct inst_id input from physical to logic Alex Deucher
` (7 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Alex Deucher @ 2025-12-10 7:14 UTC (permalink / raw)
To: amd-gfx; +Cc: Likun Gao, Hawking Zhang, Alex Deucher
From: Likun Gao <Likun.Gao@amd.com>
Use physical xcc_id to get rrmt on misc_op for mes v12_1.
Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/mes_v12_1.c | 26 ++++++++++++++++----------
1 file changed, 16 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
index 913ce414ca9bc..6a454d5eb1a3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
@@ -523,6 +523,7 @@ static void mes_v12_1_get_rrmt(uint32_t reg, uint32_t xcc_id,
static int mes_v12_1_misc_op(struct amdgpu_mes *mes,
struct mes_misc_op_input *input)
{
+ struct amdgpu_device *adev = mes->adev;
union MESAPI__MISC misc_pkt;
int pipe;
@@ -542,15 +543,17 @@ static int mes_v12_1_misc_op(struct amdgpu_mes *mes,
misc_pkt.opcode = MESAPI_MISC__READ_REG;
misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset;
misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
- mes_v12_1_get_rrmt(input->read_reg.reg_offset, input->xcc_id,
- &misc_pkt.read_reg.rrmt_opt);
+ mes_v12_1_get_rrmt(input->read_reg.reg_offset,
+ GET_INST(GC, input->xcc_id),
+ &misc_pkt.read_reg.rrmt_opt);
break;
case MES_MISC_OP_WRITE_REG:
misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset;
misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
- mes_v12_1_get_rrmt(input->write_reg.reg_offset, input->xcc_id,
- &misc_pkt.write_reg.rrmt_opt);
+ mes_v12_1_get_rrmt(input->write_reg.reg_offset,
+ GET_INST(GC, input->xcc_id),
+ &misc_pkt.write_reg.rrmt_opt);
break;
case MES_MISC_OP_WRM_REG_WAIT:
misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
@@ -559,8 +562,9 @@ static int mes_v12_1_misc_op(struct amdgpu_mes *mes,
misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
misc_pkt.wait_reg_mem.reg_offset2 = 0;
- mes_v12_1_get_rrmt(input->wrm_reg.reg0, input->xcc_id,
- &misc_pkt.wait_reg_mem.rrmt_opt1);
+ mes_v12_1_get_rrmt(input->wrm_reg.reg0,
+ GET_INST(GC, input->xcc_id),
+ &misc_pkt.wait_reg_mem.rrmt_opt1);
break;
case MES_MISC_OP_WRM_REG_WR_WAIT:
misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
@@ -569,10 +573,12 @@ static int mes_v12_1_misc_op(struct amdgpu_mes *mes,
misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
- mes_v12_1_get_rrmt(input->wrm_reg.reg0, input->xcc_id,
- &misc_pkt.wait_reg_mem.rrmt_opt1);
- mes_v12_1_get_rrmt(input->wrm_reg.reg1, input->xcc_id,
- &misc_pkt.wait_reg_mem.rrmt_opt2);
+ mes_v12_1_get_rrmt(input->wrm_reg.reg0,
+ GET_INST(GC, input->xcc_id),
+ &misc_pkt.wait_reg_mem.rrmt_opt1);
+ mes_v12_1_get_rrmt(input->wrm_reg.reg1,
+ GET_INST(GC, input->xcc_id),
+ &misc_pkt.wait_reg_mem.rrmt_opt2);
break;
case MES_MISC_OP_SET_SHADER_DEBUGGER:
pipe = AMDGPU_MES_SCHED_PIPE;
--
2.52.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH] drm/amdgpu: Correct inst_id input from physical to logic
2025-12-10 7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
` (10 preceding siblings ...)
2025-12-10 7:14 ` [PATCH] drm/amdgpu: use physical xcc id to get rrmt Alex Deucher
@ 2025-12-10 7:14 ` Alex Deucher
2025-12-10 7:14 ` [PATCH] drm/amdgpu: support xcc harvest for ih translate Alex Deucher
` (6 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Alex Deucher @ 2025-12-10 7:14 UTC (permalink / raw)
To: amd-gfx; +Cc: Likun Gao, Hawking Zhang, Alex Deucher
From: Likun Gao <Likun.Gao@amd.com>
Correct inst_id input from physical to logic for sdma v7_1.
V2: Show real instance number on logic xcc.
Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c | 50 +++++++++++++-------------
1 file changed, 25 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c
index 446b7527f5c62..e3963675bfac0 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c
@@ -369,7 +369,7 @@ static void sdma_v7_1_inst_gfx_stop(struct amdgpu_device *adev,
u32 rb_cntl, ib_cntl;
int i;
- for_each_inst(i, inst_mask) {
+ for (i = 0; i < NUM_XCC(inst_mask); i++) {
rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL));
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_ENABLE, 0);
WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl);
@@ -436,7 +436,7 @@ static void sdma_v7_1_inst_enable(struct amdgpu_device *adev,
if (amdgpu_sriov_vf(adev))
return;
- for_each_inst(i, inst_mask) {
+ for (i = 0; i < NUM_XCC(inst_mask); i++) {
mcu_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL));
mcu_cntl = REG_SET_FIELD(mcu_cntl, SDMA0_SDMA_MCU_CNTL, HALT, enable ? 0 : 1);
WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), mcu_cntl);
@@ -617,7 +617,7 @@ static int sdma_v7_1_inst_gfx_resume(struct amdgpu_device *adev,
{
int i, r;
- for_each_inst(i, inst_mask) {
+ for (i = 0; i < NUM_XCC(inst_mask); i++) {
r = sdma_v7_1_gfx_resume_instance(adev, i, false);
if (r)
return r;
@@ -647,7 +647,7 @@ static void sdma_v7_1_inst_free_ucode_buffer(struct amdgpu_device *adev,
{
int i;
- for_each_inst(i, inst_mask) {
+ for (i = 0; i < NUM_XCC(inst_mask); i++) {
amdgpu_bo_free_kernel(&adev->sdma.instance[i].sdma_fw_obj,
&adev->sdma.instance[i].sdma_fw_gpu_addr,
(void **)&adev->sdma.instance[i].sdma_fw_ptr);
@@ -686,7 +686,7 @@ static int sdma_v7_1_inst_load_microcode(struct amdgpu_device *adev,
le32_to_cpu(hdr->ucode_offset_bytes));
fw_size = le32_to_cpu(hdr->ucode_size_bytes);
- for_each_inst(i, inst_mask) {
+ for (i = 0; i < NUM_XCC(inst_mask); i++) {
r = amdgpu_bo_create_reserved(adev, fw_size,
PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
@@ -744,10 +744,10 @@ static int sdma_v7_1_soft_reset(struct amdgpu_ip_block *ip_block)
u32 tmp;
int i;
- inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ inst_mask = adev->sdma.sdma_mask;
sdma_v7_1_inst_gfx_stop(adev, inst_mask);
- for_each_inst(i, inst_mask) {
+ for (i = 0; i < NUM_XCC(inst_mask); i++) {
//tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_FREEZE));
//tmp |= SDMA0_SDMA_FREEZE__FREEZE_MASK;
//WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_FREEZE), tmp);
@@ -1288,10 +1288,14 @@ static int sdma_v7_1_sw_init(struct amdgpu_ip_block *ip_block)
ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->me = i;
- xcc_id = adev->sdma.instance[i].xcc_id;
+
+ for (xcc_id = 0; xcc_id < fls(adev->gfx.xcc_mask); xcc_id++) {
+ if (adev->sdma.instance[i].xcc_id == GET_INST(GC, xcc_id))
+ break;
+ }
DRM_DEBUG("SDMA%d.%d use_doorbell being set to: [%s]\n",
- xcc_id, i % adev->sdma.num_inst_per_xcc,
+ xcc_id, GET_INST(SDMA0, i) % adev->sdma.num_inst_per_xcc,
ring->use_doorbell?"true":"false");
ring->doorbell_index =
@@ -1299,7 +1303,7 @@ static int sdma_v7_1_sw_init(struct amdgpu_ip_block *ip_block)
ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
sprintf(ring->name, "sdma%d.%d", xcc_id,
- i % adev->sdma.num_inst_per_xcc);
+ GET_INST(SDMA0, i) % adev->sdma.num_inst_per_xcc);
r = amdgpu_ring_init(adev, ring, 1024,
&adev->sdma.trap_irq,
AMDGPU_SDMA_IRQ_INSTANCE0 + i,
@@ -1334,11 +1338,8 @@ static int sdma_v7_1_sw_init(struct amdgpu_ip_block *ip_block)
static int sdma_v7_1_sw_fini(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
- uint32_t inst_mask;
int i;
- inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
-
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
@@ -1346,7 +1347,7 @@ static int sdma_v7_1_sw_fini(struct amdgpu_ip_block *ip_block)
amdgpu_sdma_destroy_inst_ctx(adev, true);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
- sdma_v7_1_inst_free_ucode_buffer(adev, inst_mask);
+ sdma_v7_1_inst_free_ucode_buffer(adev, adev->sdma.sdma_mask);
kfree(adev->sdma.ip_dump);
@@ -1356,24 +1357,19 @@ static int sdma_v7_1_sw_fini(struct amdgpu_ip_block *ip_block)
static int sdma_v7_1_hw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
- uint32_t inst_mask;
-
- inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
- return sdma_v7_1_inst_start(adev, inst_mask);
+ return sdma_v7_1_inst_start(adev, adev->sdma.sdma_mask);
}
static int sdma_v7_1_hw_fini(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
- uint32_t inst_mask;
if (amdgpu_sriov_vf(adev))
return 0;
- inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
- sdma_v7_1_inst_ctx_switch_enable(adev, false, inst_mask);
- sdma_v7_1_inst_enable(adev, false, inst_mask);
+ sdma_v7_1_inst_ctx_switch_enable(adev, false, adev->sdma.sdma_mask);
+ sdma_v7_1_inst_enable(adev, false, adev->sdma.sdma_mask);
return 0;
}
@@ -1493,7 +1489,7 @@ static int sdma_v7_1_process_trap_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- int instances, queue, xcc_id = 0;
+ int inst, instances, queue, xcc_id = 0;
uint32_t mes_queue_id = entry->src_data[0];
DRM_DEBUG("IH: SDMA trap\n");
@@ -1518,8 +1514,12 @@ static int sdma_v7_1_process_trap_irq(struct amdgpu_device *adev,
xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, entry->node_id);
else
dev_warn(adev->dev, "IH: SDMA may get wrong xcc id as gfx function not available\n");
- instances = ((entry->ring_id & 0xf0) >> 4) +
- xcc_id * adev->sdma.num_inst_per_xcc;
+ inst = ((entry->ring_id & 0xf0) >> 4) +
+ GET_INST(GC, xcc_id) * adev->sdma.num_inst_per_xcc;
+ for (instances = 0; instances < adev->sdma.num_instances; instances++) {
+ if (inst == GET_INST(SDMA0, instances))
+ break;
+ }
if (instances > adev->sdma.num_instances - 1) {
DRM_ERROR("IH: wrong ring_ID detected, as wrong sdma instance\n");
return -EINVAL;
--
2.52.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH] drm/amdgpu: support xcc harvest for ih translate
2025-12-10 7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
` (11 preceding siblings ...)
2025-12-10 7:14 ` [PATCH] drm/amdgpu: Correct inst_id input from physical to logic Alex Deucher
@ 2025-12-10 7:14 ` Alex Deucher
2025-12-10 7:14 ` [PATCH] drm/amdgpu: normalize reg addr as local xcc for gfx v12_1 Alex Deucher
` (5 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Alex Deucher @ 2025-12-10 7:14 UTC (permalink / raw)
To: amd-gfx; +Cc: Likun Gao, Hawking Zhang, Alex Deucher
From: Likun Gao <Likun.Gao@amd.com>
Support xcc harvest for ih translate to logic xcc.
V2: Only check available instances
Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
index 2f94c44bd6e11..26f8394bdce4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -700,14 +700,16 @@ static int gfx_v12_1_get_xccs_per_xcp(struct amdgpu_device *adev)
static int gfx_v12_1_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node)
{
+ int logic_xcc;
int xcc = (ih_node & 0x7) - 2 + (ih_node >> 3) * 4;
- if (xcc < 0 || xcc >= hweight8(adev->gfx.xcc_mask)) {
- dev_err(adev->dev, "Couldn't find xcc mapping from IH node");
- return -EINVAL;
+ for (logic_xcc = 0; logic_xcc < NUM_XCC(adev->gfx.xcc_mask); logic_xcc++) {
+ if (xcc == GET_INST(GC, logic_xcc))
+ return logic_xcc;
}
- return xcc;
+ dev_err(adev->dev, "Couldn't find xcc mapping from IH node");
+ return -EINVAL;
}
static const struct amdgpu_gfx_funcs gfx_v12_1_gfx_funcs = {
--
2.52.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH] drm/amdgpu: normalize reg addr as local xcc for gfx v12_1
2025-12-10 7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
` (12 preceding siblings ...)
2025-12-10 7:14 ` [PATCH] drm/amdgpu: support xcc harvest for ih translate Alex Deucher
@ 2025-12-10 7:14 ` Alex Deucher
2025-12-10 7:14 ` [PATCH] drm/amdgpu/mes_v12_1: fix mes access xcd register Alex Deucher
` (4 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Alex Deucher @ 2025-12-10 7:14 UTC (permalink / raw)
To: amd-gfx; +Cc: Likun Gao, Lijo Lazar, Alex Deucher
From: Likun Gao <Likun.Gao@amd.com>
Normalize registers address to local xcc address for gfx v12_1.
Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c | 30 ++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
index 26f8394bdce4b..f4b31752c6530 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -59,6 +59,13 @@ MODULE_FIRMWARE("amdgpu/gc_12_1_0_rlc.bin");
(SH_MEM_ALIGNMENT_MODE_UNALIGNED_GFX12_1_0 << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
(3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
+#define XCC_REG_RANGE_0_LOW 0x1260 /* XCC gfxdec0 lower Bound */
+#define XCC_REG_RANGE_0_HIGH 0x3C00 /* XCC gfxdec0 upper Bound */
+#define XCC_REG_RANGE_1_LOW 0xA000 /* XCC gfxdec1 lower Bound */
+#define XCC_REG_RANGE_1_HIGH 0x10000 /* XCC gfxdec1 upper Bound */
+#define NORMALIZE_XCC_REG_OFFSET(offset) \
+ (offset & 0xFFFF)
+
static void gfx_v12_1_xcc_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id);
static void gfx_v12_1_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v12_1_set_irq_funcs(struct amdgpu_device *adev);
@@ -220,11 +227,30 @@ static void gfx_v12_1_set_kiq_pm4_funcs(struct amdgpu_device *adev)
adev->gfx.kiq[i].pmf = &gfx_v12_1_kiq_pm4_funcs;
}
+static uint32_t gfx_v12_1_normalize_xcc_reg_offset(uint32_t reg)
+{
+ uint32_t normalized_reg = NORMALIZE_XCC_REG_OFFSET(reg);
+
+ /* If it is an XCC reg, normalize the reg to keep
+ lower 16 bits in local xcc */
+
+ if (((normalized_reg >= XCC_REG_RANGE_0_LOW) && (normalized_reg < XCC_REG_RANGE_0_HIGH)) ||
+ ((normalized_reg >= XCC_REG_RANGE_1_LOW) && (normalized_reg < XCC_REG_RANGE_1_HIGH)))
+ return normalized_reg;
+ else
+ return reg;
+}
+
static void gfx_v12_1_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
int mem_space, int opt, uint32_t addr0,
uint32_t addr1, uint32_t ref,
uint32_t mask, uint32_t inv)
{
+ if (mem_space == 0) {
+ addr0 = gfx_v12_1_normalize_xcc_reg_offset(addr0);
+ addr1 = gfx_v12_1_normalize_xcc_reg_offset(addr1);
+ }
+
amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
amdgpu_ring_write(ring,
/* memory (1) or register (0) */
@@ -3338,6 +3364,8 @@ static void gfx_v12_1_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
{
struct amdgpu_device *adev = ring->adev;
+ reg = gfx_v12_1_normalize_xcc_reg_offset(reg);
+
amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
amdgpu_ring_write(ring, 0 | /* src: register*/
(5 << 8) | /* dst: memory */
@@ -3356,6 +3384,8 @@ static void gfx_v12_1_ring_emit_wreg(struct amdgpu_ring *ring,
{
uint32_t cmd = 0;
+ reg = gfx_v12_1_normalize_xcc_reg_offset(reg);
+
switch (ring->funcs->type) {
case AMDGPU_RING_TYPE_KIQ:
cmd = (1 << 16); /* no inc addr */
--
2.52.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH] drm/amdgpu/mes_v12_1: fix mes access xcd register
2025-12-10 7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
` (13 preceding siblings ...)
2025-12-10 7:14 ` [PATCH] drm/amdgpu: normalize reg addr as local xcc for gfx v12_1 Alex Deucher
@ 2025-12-10 7:14 ` Alex Deucher
2025-12-10 7:14 ` [PATCH] drm/amdgpu: add gfx sysfs support for gfx_v12_1 Alex Deucher
` (3 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Alex Deucher @ 2025-12-10 7:14 UTC (permalink / raw)
To: amd-gfx; +Cc: Jack Xiao, Likun Gao, Alex Deucher
From: Jack Xiao <Jack.Xiao@amd.com>
Fix to use local register offset inside die for mes fw accessing
local/remote xcd register.
Signed-off-by: Jack Xiao <Jack.Xiao@amd.com>
Reviewed-by: Likun Gao <Likun.Gao@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/mes_v12_1.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
index 6a454d5eb1a3b..b0b1df5f1d4b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
@@ -546,6 +546,10 @@ static int mes_v12_1_misc_op(struct amdgpu_mes *mes,
mes_v12_1_get_rrmt(input->read_reg.reg_offset,
GET_INST(GC, input->xcc_id),
&misc_pkt.read_reg.rrmt_opt);
+ if (misc_pkt.read_reg.rrmt_opt.mode != MES_RRMT_MODE_REMOTE_MID) {
+ misc_pkt.read_reg.reg_offset =
+ NORMALIZE_XCC_REG_OFFSET(misc_pkt.read_reg.reg_offset);
+ }
break;
case MES_MISC_OP_WRITE_REG:
misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
@@ -554,6 +558,10 @@ static int mes_v12_1_misc_op(struct amdgpu_mes *mes,
mes_v12_1_get_rrmt(input->write_reg.reg_offset,
GET_INST(GC, input->xcc_id),
&misc_pkt.write_reg.rrmt_opt);
+ if (misc_pkt.write_reg.rrmt_opt.mode != MES_RRMT_MODE_REMOTE_MID) {
+ misc_pkt.write_reg.reg_offset =
+ NORMALIZE_XCC_REG_OFFSET(misc_pkt.write_reg.reg_offset);
+ }
break;
case MES_MISC_OP_WRM_REG_WAIT:
misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
@@ -565,6 +573,10 @@ static int mes_v12_1_misc_op(struct amdgpu_mes *mes,
mes_v12_1_get_rrmt(input->wrm_reg.reg0,
GET_INST(GC, input->xcc_id),
&misc_pkt.wait_reg_mem.rrmt_opt1);
+ if (misc_pkt.wait_reg_mem.rrmt_opt1.mode != MES_RRMT_MODE_REMOTE_MID) {
+ misc_pkt.wait_reg_mem.reg_offset1 =
+ NORMALIZE_XCC_REG_OFFSET(misc_pkt.wait_reg_mem.reg_offset1);
+ }
break;
case MES_MISC_OP_WRM_REG_WR_WAIT:
misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
@@ -579,6 +591,14 @@ static int mes_v12_1_misc_op(struct amdgpu_mes *mes,
mes_v12_1_get_rrmt(input->wrm_reg.reg1,
GET_INST(GC, input->xcc_id),
&misc_pkt.wait_reg_mem.rrmt_opt2);
+ if (misc_pkt.wait_reg_mem.rrmt_opt1.mode != MES_RRMT_MODE_REMOTE_MID) {
+ misc_pkt.wait_reg_mem.reg_offset1 =
+ NORMALIZE_XCC_REG_OFFSET(misc_pkt.wait_reg_mem.reg_offset1);
+ }
+ if (misc_pkt.wait_reg_mem.rrmt_opt2.mode != MES_RRMT_MODE_REMOTE_MID) {
+ misc_pkt.wait_reg_mem.reg_offset2 =
+ NORMALIZE_XCC_REG_OFFSET(misc_pkt.wait_reg_mem.reg_offset2);
+ }
break;
case MES_MISC_OP_SET_SHADER_DEBUGGER:
pipe = AMDGPU_MES_SCHED_PIPE;
--
2.52.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH] drm/amdgpu: add gfx sysfs support for gfx_v12_1
2025-12-10 7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
` (14 preceding siblings ...)
2025-12-10 7:14 ` [PATCH] drm/amdgpu/mes_v12_1: fix mes access xcd register Alex Deucher
@ 2025-12-10 7:14 ` Alex Deucher
2025-12-10 7:14 ` [PATCH] drm/amdgpu: correct rlc autoload for xcc harvest Alex Deucher
` (2 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Alex Deucher @ 2025-12-10 7:14 UTC (permalink / raw)
To: amd-gfx; +Cc: Likun Gao, Hawking Zhang, Alex Deucher
From: Likun Gao <Likun.Gao@amd.com>
Add gfx sysfs support for gfx_v12_1.
Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
index f4b31752c6530..3a641d223ad68 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -1250,6 +1250,10 @@ static int gfx_v12_1_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
return 0;
}
--
2.52.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH] drm/amdgpu: correct rlc autoload for xcc harvest
2025-12-10 7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
` (15 preceding siblings ...)
2025-12-10 7:14 ` [PATCH] drm/amdgpu: add gfx sysfs support for gfx_v12_1 Alex Deucher
@ 2025-12-10 7:14 ` Alex Deucher
2025-12-10 7:14 ` [PATCH] drm/amdkfd: Override KFD SVM mappings for GFX 12.1 Alex Deucher
2025-12-10 7:14 ` [PATCH] drm/amdgpu: Add gfx v12_1 interrupt source header Alex Deucher
18 siblings, 0 replies; 20+ messages in thread
From: Alex Deucher @ 2025-12-10 7:14 UTC (permalink / raw)
To: amd-gfx; +Cc: Likun Gao, Hawking Zhang, Alex Deucher
From: Likun Gao <Likun.Gao@amd.com>
If the number instances of firmware is RLC_NUM_INS_CODE0(Only 1 inst),
need to copy it directly for rlcautolad.
For the firmware which instances number bigger than 1, only copy for
enabled XCC to save copy time.
Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
index 3a641d223ad68..96ca3648205d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -933,7 +933,8 @@ static void gfx_v12_1_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *ade
fw_size = toc_fw_inst_size;
for (i = 0; i < num_inst; i++) {
- if ((1 << (i / 2)) & adev->gfx.xcc_mask) {
+ if ((num_inst == RLC_NUM_INS_CODE0) ||
+ ((1 << (i / 2)) & adev->gfx.xcc_mask)) {
memcpy(ptr + toc_offset + i * toc_fw_inst_size, fw_data, fw_size);
if (fw_size < toc_fw_inst_size)
--
2.52.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH] drm/amdkfd: Override KFD SVM mappings for GFX 12.1
2025-12-10 7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
` (16 preceding siblings ...)
2025-12-10 7:14 ` [PATCH] drm/amdgpu: correct rlc autoload for xcc harvest Alex Deucher
@ 2025-12-10 7:14 ` Alex Deucher
2025-12-10 7:14 ` [PATCH] drm/amdgpu: Add gfx v12_1 interrupt source header Alex Deucher
18 siblings, 0 replies; 20+ messages in thread
From: Alex Deucher @ 2025-12-10 7:14 UTC (permalink / raw)
To: amd-gfx; +Cc: Mukul Joshi, Alex Sierra, Alex Deucher
From: Mukul Joshi <mukul.joshi@amd.com>
Override the local MTYPE mappings in KFD SVM code with mtype_local
modprobe param for GFX 12.1.
Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Reviewed-by: Alex Sierra <alex.sierra@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 3235774f3b64c..1ed08388d3646 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1309,9 +1309,11 @@ svm_range_get_pte_flags(struct kfd_node *node, struct amdgpu_vm *vm,
case IP_VERSION(12, 1, 0):
snoop = true;
if (domain == SVM_RANGE_VRAM_DOMAIN) {
+ mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC :
+ AMDGPU_VM_MTYPE_RW;
/* local HBM */
if (bo_node->adev == node->adev)
- mapping_flags |= AMDGPU_VM_MTYPE_RW;
+ mapping_flags |= mtype_local;
/* Remote GPU memory */
else
mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC :
--
2.52.0
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH] drm/amdgpu: Add gfx v12_1 interrupt source header
2025-12-10 7:13 [PATCH] drm/amdgpu: Flush TLB on all XCCs on GFX 12.1 Alex Deucher
` (17 preceding siblings ...)
2025-12-10 7:14 ` [PATCH] drm/amdkfd: Override KFD SVM mappings for GFX 12.1 Alex Deucher
@ 2025-12-10 7:14 ` Alex Deucher
18 siblings, 0 replies; 20+ messages in thread
From: Alex Deucher @ 2025-12-10 7:14 UTC (permalink / raw)
To: amd-gfx; +Cc: Hawking Zhang, Likun Gao, Alex Deucher
From: Hawking Zhang <Hawking.Zhang@amd.com>
To acommandate specific interrupt source for gfx v12_1
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Likun Gao <Likun.Gao@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c | 8 +-
drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c | 4 +-
.../include/ivsrcid/gfx/irqsrcs_gfx_12_1_0.h | 136 ++++++++++++++++++
3 files changed, 142 insertions(+), 6 deletions(-)
create mode 100644 drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_12_1_0.h
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
index 96ca3648205d6..6a4ecded103aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -37,7 +37,7 @@
#include "gc/gc_12_1_0_offset.h"
#include "gc/gc_12_1_0_sh_mask.h"
#include "soc24_enum.h"
-#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_12_1_0.h"
#include "soc15.h"
#include "clearstate_gfx12.h"
@@ -1170,21 +1170,21 @@ static int gfx_v12_1_sw_init(struct amdgpu_ip_block *ip_block)
/* EOP Event */
r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP,
- GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
+ GFX_12_1_0__SRCID__CP_EOP_INTERRUPT,
&adev->gfx.eop_irq);
if (r)
return r;
/* Privileged reg */
r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP,
- GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
+ GFX_12_1_0__SRCID__CP_PRIV_REG_FAULT,
&adev->gfx.priv_reg_irq);
if (r)
return r;
/* Privileged inst */
r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP,
- GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
+ GFX_12_1_0__SRCID__CP_PRIV_INSTR_FAULT,
&adev->gfx.priv_inst_irq);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c
index e3963675bfac0..fe0e84b45cf4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c
@@ -32,7 +32,7 @@
#include "gc/gc_12_1_0_offset.h"
#include "gc/gc_12_1_0_sh_mask.h"
-#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_12_1_0.h"
#include "soc15_common.h"
#include "soc15.h"
@@ -1278,7 +1278,7 @@ static int sdma_v7_1_sw_init(struct amdgpu_ip_block *ip_block)
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GFX,
- GFX_11_0_0__SRCID__SDMA_TRAP,
+ GFX_12_1_0__SRCID__SDMA_TRAP,
&adev->sdma.trap_irq);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_12_1_0.h b/drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_12_1_0.h
new file mode 100644
index 0000000000000..9fe5466e94183
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_12_1_0.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __IRQSRCS_GFX_12_1_0_H__
+#define __IRQSRCS_GFX_12_1_0_H__
+
+/* 0x0 UTCL2 has encountered a fault scenario */
+#define GFX_12_1_0__SRCID__UTCL2_FAULT 0
+/* 0x1 UTCL2 has encountered a retry scenario */
+#define GFX_12_1_0__SRCID__UTCL2_RETRY 1
+/* 0x2 UTCL2 for data poisoning */
+#define GFX_12_1_0__SRCID__UTCL2_DATA_POISONING 2
+/* 0x30 SDMA atomic*_rtn ops complete */
+#define GFX_12_1_0__SRCID__SDMA_ATOMIC_RTN_DONE 48
+/* 0x31 Trap */
+#define GFX_12_1_0__SRCID__SDMA_TRAP 49
+/* 0x32 SRBM write Protection */
+#define GFX_12_1_0__SRCID__SDMA_SRBMWRITE 50
+/* 0x33 Context Empty */
+#define GFX_12_1_0__SRCID__SDMA_CTXEMPTY 51
+/* 0x34 SDMA New Run List */
+#define GFX_12_1_0__SRCID__SDMA_PREEMPT 52
+/* 0x35 sdma mid - command buffer preempt interrupt */
+#define GFX_12_1_0__SRCID__SDMA_IB_PREEMPT 53
+/* 0x36 Doorbell BE invalid */
+#define GFX_12_1_0__SRCID__SDMA_DOORBELL_INVALID 54
+/* 0x37 Queue hang or Command timeout */
+#define GFX_12_1_0__SRCID__SDMA_QUEUE_HANG 55
+/* 0x38 SDMA atomic CMPSWAP loop timeout */
+#define GFX_12_1_0__SRCID__SDMA_ATOMIC_TIMEOUT 56
+/* 0x39 SRBM read poll timeout */
+#define GFX_12_1_0__SRCID__SDMA_POLL_TIMEOUT 57
+/* 0x3A Page retry timeout after UTCL2 return nack = 1 */
+#define GFX_12_1_0__SRCID__SDMA_PAGE_TIMEOUT 58
+/* 0x3B Page Null from UTCL2 when nack = 2 */
+#define GFX_12_1_0__SRCID__SDMA_PAGE_NULL 59
+/* 0x3C Page Fault Error from UTCL2 when nack = 3 */
+#define GFX_12_1_0__SRCID__SDMA_PAGE_FAULT 60
+/* 0x3D MC or SEM address in VM hole */
+#define GFX_12_1_0__SRCID__SDMA_INVALID_ADDR 61
+/* 0x3E ECC Error */
+#define GFX_12_1_0__SRCID__SDMA_ECC 62
+/* 0x3F SDMA Frozen */
+#define GFX_12_1_0__SRCID__SDMA_FROZEN 63
+/* 0x40 SRAM ECC Error */
+#define GFX_12_1_0__SRCID__SDMA_SRAM_ECC 64
+/* 0x41 GPF(Sem incomplete timeout) */
+#define GFX_12_1_0__SRCID__SDMA_SEM_INCOMPLETE_TIMEOUT 65
+/* 0x42 Semaphore wait fail timeout */
+#define GFX_12_1_0__SRCID__SDMA_SEM_WAIT_FAIL_TIMEOUT 66
+/* 0x43 Wptr less than Rptr in active queue */
+#define GFX_12_1_0__SRCID__SDMA_INVALID_RB_PTR 67
+/* 0x44 BE command exception */
+#define GFX_12_1_0__SRCID__SDMA_BE_EXCEPTION 68
+/* 0x46 User fence. inherit from gfx v12_0 for gfx user queue */
+#define GFX_12_1_0__SRCID__SDMA_FENCE 70
+/* 0xB0 CP_INTERRUPT pkt in RB */
+#define GFX_12_1_0__SRCID__CP_RB_INT_PKT 176
+/* 0xB1 CP_INTERRUPT pkt in IB1 */
+#define GFX_12_1_0__SRCID__CP_IB1_INT_PKT 177
+/* 0xB2 CP_INTERRUPT pkt in IB2 */
+#define GFX_12_1_0__SRCID__CP_IB2_INT_PKT 178
+/* 0xB3 DMA Watch Interrupt */
+#define GFX_12_1_0__SRCID__CP_DMA_WATCH_INTERRUPT 179
+/* 0xB4 PM4 Pkt Rsvd Bits Error */
+#define GFX_12_1_0__SRCID__CP_PM4_PKT_RSVD_BIT_ERROR 180
+/* 0xB5 End-of-Pipe Interrupt */
+#define GFX_12_1_0__SRCID__CP_EOP_INTERRUPT 181
+/* 0xB7 Bad Opcode Error */
+#define GFX_12_1_0__SRCID__CP_BAD_OPCODE_ERROR 183
+/* 0xB8 Privileged Register Fault */
+#define GFX_12_1_0__SRCID__CP_PRIV_REG_FAULT 184
+/* 0xB9 Privileged Instr Fault */
+#define GFX_12_1_0__SRCID__CP_PRIV_INSTR_FAULT 185
+/* 0xBA Wait Memory Semaphore Fault (Sync Object Fault) */
+#define GFX_12_1_0__SRCID__CP_WAIT_MEM_SEM_FAULT 186
+/* 0xBB Context Empty Interrupt */
+#define GFX_12_1_0__SRCID__CP_CTX_EMPTY_INTERRUPT 187
+/* 0xBC Context Busy Interrupt */
+#define GFX_12_1_0__SRCID__CP_CTX_BUSY_INTERRUPT 188
+/* 0xC0 CP.ME Wait_Reg_Mem Poll Timeout */
+#define GFX_12_1_0__SRCID__CP_ME_WAIT_REG_MEM_POLL_TIMEOUT 192
+/* 0xC1 Surface Probe Fault Signal Incomplete */
+#define GFX_12_1_0__SRCID__CP_SIG_INCOMPLETE 193
+/* 0xC2 Preemption Ack-wledge */
+#define GFX_12_1_0__SRCID__CP_PREEMPT_ACK 194
+/* 0xC3 General Protection Fault (GPF) */
+#define GFX_12_1_0__SRCID__CP_GPF 195
+/* 0xC4 GDS Alloc Error */
+#define GFX_12_1_0__SRCID__CP_GDS_ALLOC_ERROR 196
+/* 0xC5 ECC Error */
+#define GFX_12_1_0__SRCID__CP_ECC_ERROR 197
+/* 0xC8 Unattached VM Doorbell Received */
+#define GFX_12_1_0__SRCID__CP_VM_DOORBELL 200
+/* 0xC9 ECC FUE Error */
+#define GFX_12_1_0__SRCID__CP_FUE_ERROR 201
+/* 0xCA Suspend Completion Interrupt */
+#define GFX_12_1_0__SRCID__CP_SUSPEAND_REQ_INTERRUPT 202
+/* 0xCB Resume Completion Interrupt */
+#define GFX_12_1_0__SRCID__CP_RESUME_REQ_INTERRUPT 203
+/* 0xCA RLC Streaming Perf Monitor Interrupt
+ * ContextID[15:0] each bit indicates poison is seen on respecive indexed VMID
+ * Ex: ContextID[3] == 1 means VMID-3 encountered poison consumption
+ * ContextID[16] == 1 indicates that complete VF need to reset with FLR */
+#define GFX_12_1_0__SRCID__RLC_STRM_PERF_MONITOR_INTERRUPT 202
+/* 0xCB RLC Poison Interrupt */
+#define GFX_12_1_0__SRCID__RLC_POISON_INTERRUPT 203
+/* 0xE7 High on ContextID[0] - nHT Error; ContextID[1] - illegal Opcode Error */
+#define GFX_12_1_0__SRCID__PMR_EA_ERROR_INTERRUPT 231
+/* 0xE8 CRead timeout error */
+#define GFX_12_1_0__SRCID__GRBM_RD_TIMEOUT_ERROR 232
+/* 0xE9 Register GUI Idle */
+#define GFX_12_1_0__SRCID__GRBM_REG_GUI_IDLE 233
+/* 0xEF SQ Interrupt (ttrace wrap, errors) */
+#define GFX_12_1_0__SRCID__SQ_INTERRUPT_ID 239
+
+#endif
--
2.52.0
^ permalink raw reply related [flat|nested] 20+ messages in thread