* [PATCH] drm/amdgpu: update cp cmd pkt for gfx v12_1
@ 2026-04-22 21:50 Alex Deucher
2026-04-22 21:50 ` [PATCH] drm/amdgpu: In GFX12.1 CU is same as WGP Alex Deucher
2026-04-22 21:50 ` [PATCH] drm/amdgpu: Switch to gfx_v12_1_get_xccs_per_xcp Alex Deucher
0 siblings, 2 replies; 3+ messages in thread
From: Alex Deucher @ 2026-04-22 21:50 UTC (permalink / raw)
To: amd-gfx; +Cc: Likun Gao, Hawking Zhang, Alex Deucher
From: Likun Gao <Likun.Gao@amd.com>
Update CP command package for gfx v12.1.
v2: squash in update (Alex)
Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c | 34 +-
drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h | 347 +++++++++++++++------
2 files changed, 273 insertions(+), 108 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
index 948758b51b5cc..f4089ab108474 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -243,9 +243,9 @@ static void gfx_v12_1_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
amdgpu_ring_write(ring,
/* memory (1) or register (0) */
- (WAIT_REG_MEM_MEM_SPACE(mem_space) |
- WAIT_REG_MEM_OPERATION(opt) | /* wait */
- WAIT_REG_MEM_FUNCTION(3))); /* equal */
+ (PACKET3_WAIT_REG_MEM__MEM_SPACE(mem_space) |
+ PACKET3_WAIT_REG_MEM__OPERATION(opt) | /* wait */
+ PACKET3_WAIT_REG_MEM__FUNCTION(3))); /* equal */
if (mem_space)
BUG_ON(addr0 & 0x3); /* Dword align */
@@ -335,7 +335,7 @@ static int gfx_v12_1_ring_test_ib(struct amdgpu_ring *ring, long timeout)
}
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
- ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+ ib.ptr[1] = PACKET3_WRITE_DATA__DST_SEL(5) | PACKET3_WRITE_DATA__WR_CONFIRM(1);
ib.ptr[2] = lower_32_bits(gpu_addr);
ib.ptr[3] = upper_32_bits(gpu_addr);
ib.ptr[4] = 0xDEADBEEF;
@@ -3366,7 +3366,7 @@ static void gfx_v12_1_ring_emit_ib_compute(struct amdgpu_ring *ring,
uint32_t flags)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
- u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+ u32 control = PACKET3_INDIRECT_BUFFER__VALID(1) | ib->length_dw | (vmid << 24);
/* Currently, there is a high possibility to get wave ID mismatch
* between ME and GDS, leading to a hw deadlock, because ME generates
@@ -3402,15 +3402,15 @@ static void gfx_v12_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
- amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ(1) |
- PACKET3_RELEASE_MEM_GCR_GLV_WB |
- PACKET3_RELEASE_MEM_GCR_GL2_WB |
- PACKET3_RELEASE_MEM_GCR_GL2_SCOPE(2) |
- PACKET3_RELEASE_MEM_TEMPORAL(3) |
- PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
- PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
- amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
- PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM__GCR_SEQ(1) |
+ PACKET3_RELEASE_MEM__GCR_GLV_WB |
+ PACKET3_RELEASE_MEM__GCR_GL2_WB |
+ PACKET3_RELEASE_MEM__GCR_GL2_SCOPE(2) |
+ PACKET3_RELEASE_MEM__TEMPORAL(3) |
+ PACKET3_RELEASE_MEM__EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ PACKET3_RELEASE_MEM__EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM__DATA_SEL(write64bit ? 2 : 1) |
+ PACKET3_RELEASE_MEM__INT_SEL(int_sel ? 2 : 0)));
/*
* the address should be Qword aligned if 64bit write, Dword
@@ -3471,7 +3471,7 @@ static void gfx_v12_1_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
/* write fence seq to the "addr" */
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+ amdgpu_ring_write(ring, (PACKET3_WRITE_DATA__DST_SEL(5) | PACKET3_WRITE_DATA__WR_CONFIRM(1)));
amdgpu_ring_write(ring, lower_32_bits(addr));
amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, lower_32_bits(seq));
@@ -3479,7 +3479,7 @@ static void gfx_v12_1_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
if (flags & AMDGPU_FENCE_FLAG_INT) {
/* set register to trigger INT */
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+ amdgpu_ring_write(ring, (PACKET3_WRITE_DATA__DST_SEL(0) | PACKET3_WRITE_DATA__WR_CONFIRM(1)));
amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCPC_INT_STATUS));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
@@ -3518,7 +3518,7 @@ static void gfx_v12_1_ring_emit_wreg(struct amdgpu_ring *ring,
cmd = (1 << 16); /* no inc addr */
break;
default:
- cmd = WR_CONFIRM;
+ cmd = PACKET3_WRITE_DATA__WR_CONFIRM(1);
break;
}
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h
index df0ada0b56d1d..77b4f6ea532f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h
@@ -58,6 +58,29 @@
#define PACKET3_DISPATCH_DIRECT 0x15
#define PACKET3_DISPATCH_INDIRECT 0x16
#define PACKET3_ATOMIC_MEM 0x1E
+#define PACKET3_ATOMIC_MEM__ATOMIC(x) ((((unsigned)(x)) & 0x7F) << 0)
+#define PACKET3_ATOMIC_MEM__COMMAND(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_ATOMIC_MEM__SCOPE(x) ((((unsigned)(x)) & 0x3) << 23)
+#define PACKET3_ATOMIC_MEM__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_ATOMIC_MEM__ADDR_LO(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__ADDR_HI(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__SRC_DATA_LO(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__SRC_DATA_HI(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__CMP_DATA_LO(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__CMP_DATA_HI(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__LOOP_INTERVAL(x) ((((unsigned)(x)) & 0x1FFF) << 0)
+#define PACKET3_ATOMIC_MEM__COMMAND__SINGLE_PASS_ATOMIC 0
+#define PACKET3_ATOMIC_MEM__COMMAND__LOOP_UNTIL_COMPARE_SATISFIED 1
+#define PACKET3_ATOMIC_MEM__COMMAND__WAIT_FOR_WRITE_CONFIRMATION 2
+#define PACKET3_ATOMIC_MEM__COMMAND__SEND_AND_CONTINUE 3
+#define PACKET3_ATOMIC_MEM__SCOPE__CU 0
+#define PACKET3_ATOMIC_MEM__SCOPE__SE 1
+#define PACKET3_ATOMIC_MEM__SCOPE__DEVICE 2
+#define PACKET3_ATOMIC_MEM__SCOPE__SYSTEM 3
+#define PACKET3_ATOMIC_MEM__TEMPORAL__RT 0
+#define PACKET3_ATOMIC_MEM__TEMPORAL__NT 1
+#define PACKET3_ATOMIC_MEM__TEMPORAL__FW 2
+#define PACKET3_ATOMIC_MEM__TEMPORAL__UC 3
#define PACKET3_OCCLUSION_QUERY 0x1F
#define PACKET3_SET_PREDICATION 0x20
#define PACKET3_REG_RMW 0x21
@@ -76,7 +99,11 @@
#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
#define PACKET3_WRITE_DATA 0x37
-#define WRITE_DATA_DST_SEL(x) (((x) & 0xf) << 8)
+#define PACKET3_WRITE_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_WRITE_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_WRITE_DATA__DST_SEL__TC_L2 2
+#define PACKET3_WRITE_DATA__DST_SEL__MEMORY 5
+#define PACKET3_WRITE_DATA__DST_SEL__MEMORY_MAPPED_ADC_PERSISTENT_STATE 6
/* 0 - register
* 1 - reserved
* 2 - tc_l2
@@ -85,27 +112,52 @@
* 5 - memory (same as tc_l2)
* 6 - memory_mapped_adc_persistent_state
*/
-#define WRITE_DATA_SCOPE(x) (((x) & 0x3) << 12)
-#define WRITE_DATA_MODE(x) (((x) & 0x3) << 14)
+#define PACKET3_WRITE_DATA__SCOPE(x) ((((unsigned)(x)) & 0x3) << 12)
+#define PACKET3_WRITE_DATA__SCOPE__CU 0
+#define PACKET3_WRITE_DATA__SCOPE__SE 1
+#define PACKET3_WRITE_DATA__SCOPE__DEVICE 2
+#define PACKET3_WRITE_DATA__SCOPE__SYSTEM 3
+#define PACKET3_WRITE_DATA__MODE(x) ((((unsigned)(x)) & 0x3) << 14)
+#define PACKET3_WRITE_DATA__MODE__LOCAL_XCD 0
+#define PACKET3_WRITE_DATA__MODE__REMOTE_OR_LOCAL_AID 1
+#define PACKET3_WRITE_DATA__MODE__REMOTE_XCD 2
+#define PACKET3_WRITE_DATA__MODE__REMOTE_MID 3
/* 0 - local xcd
* 1 - remote/local aid
* 2 - remote xcd
* 3 - remote mid
*/
-#define WRITE_DATA_ADDR_INCR (1 << 16)
-#define WRITE_DATA_MID_DIE_ID(x) (((x) & 0x3) << 18)
-#define WR_CONFIRM (1 << 20)
-#define WRITE_DATA_XCD_DIE_ID(x) (((x) & 0xf) << 21)
-#define WRITE_DATA_TEMPORAL(x) (((x) & 0x3) << 25)
- /* 0 - rt
- * 1 - nt
- * 2 - ht
- * 3 - lu
- */
-#define WRITE_DATA_COOP_DISABLE (1 << 27)
+#define PACKET3_WRITE_DATA__ADDR_INCR(x) ((((unsigned)(x)) & 0x1) << 16)
+#define PACKET3_WRITE_DATA__ADDR_INCR__INCREMENT_ADDRESS 0
+#define PACKET3_WRITE_DATA__ADDR_INCR__DO_NOT_INCREMENT_ADDRESS 1
+#define PACKET3_WRITE_DATA__MID_DIE_ID(x) ((((unsigned)(x)) & 0x3) << 18)
+#define PACKET3_WRITE_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_WRITE_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_WRITE_CONFIRMATION 0
+#define PACKET3_WRITE_DATA__WR_CONFIRM__WAIT_FOR_WRITE_CONFIRMATION 1
+#define PACKET3_WRITE_DATA__XCD_DIE_ID(x) ((((unsigned)(x)) & 0xF) << 21)
+#define PACKET3_WRITE_DATA__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WRITE_DATA__TEMPORAL__RT 0
+#define PACKET3_WRITE_DATA__TEMPORAL__NT 1
+#define PACKET3_WRITE_DATA__TEMPORAL__HT 2
+#define PACKET3_WRITE_DATA__TEMPORAL__LU 3
+#define PACKET3_WRITE_DATA__COOP_DISABLE(x) ((((unsigned)(x)) & 0x1) << 27)
+#define PACKET3_WRITE_DATA__COOP_DISABLE__MASTER_AND_SLAVE_COOP 0
+#define PACKET3_WRITE_DATA__COOP_DISABLE__MASTER_ONLY 1
+#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_LO(x) ((unsigned)(x))
+#define PACKET3_WRITE_DATA__DST_MEM_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_HI(x) ((((unsigned)(x)) & 0x3FFF) << 0)
+#define PACKET3_WRITE_DATA__DST_MEM_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_WRITE_DATA__DATA(x) ((unsigned)(x))
#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
#define PACKET3_WAIT_REG_MEM 0x3C
-#define WAIT_REG_MEM_FUNCTION(x) (((x) & 0x7) << 0)
+#define PACKET3_WAIT_REG_MEM__FUNCTION(x) ((((unsigned)(x)) & 0x7) << 0)
+#define PACKET3_WAIT_REG_MEM__FUNCTION__ALWAYS_PASS 0
+#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_REF_VALUE 1
+#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_EQUAL_TO_THE_REF_VALUE 2
+#define PACKET3_WAIT_REG_MEM__FUNCTION__EQUAL_TO_THE_REFERENCE_VALUE 3
+#define PACKET3_WAIT_REG_MEM__FUNCTION__NOT_EQUAL_REFERENCE_VALUE 4
+#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_OR_EQUAL_REFERENCE_VALUE 5
+#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_REFERENCE_VALUE 6
/* 0 - always
* 1 - <
* 2 - <=
@@ -114,117 +166,222 @@
* 5 - >=
* 6 - >
*/
-#define WAIT_REG_MEM_MEM_SPACE(x) (((x) & 0x3) << 4)
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE(x) ((((unsigned)(x)) & 0x3) << 4)
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE__REGISTER_SPACE 0
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE__MEMORY_SPACE 1
/* 0 - reg
* 1 - mem
*/
-#define WAIT_REG_MEM_OPERATION(x) (((x) & 0x3) << 6)
+#define PACKET3_WAIT_REG_MEM__OPERATION(x) ((((unsigned)(x)) & 0x3) << 6)
+#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_REG_MEM 0
+#define PACKET3_WAIT_REG_MEM__OPERATION__WR_WAIT_WR_REG 1
+#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_MEM_PREEMPTABLE 3
/* 0 - wait_reg_mem
* 1 - wr_wait_wr_reg
+ * 2 - reserved
+ * 3 - wait_mem_preemptable
*/
-#define WAIT_REG_MEM_MODE(x) (((x) & 0x3) << 10)
+#define PACKET3_WAIT_REG_MEM__MODE(x) ((((unsigned)(x)) & 0x3) << 10)
+#define PACKET3_WAIT_REG_MEM__MODE__LOCAL_XCD 0
+#define PACKET3_WAIT_REG_MEM__MODE__REMOTE_OR_LOCAL_AID 1
+#define PACKET3_WAIT_REG_MEM__MODE__REMOTE_XCD 2
+#define PACKET3_WAIT_REG_MEM__MODE__REMOTE_MID 3
/* 0 - local xcd
* 1 - remote/local aid
* 2 - remote xcd
* 3 - remote mid
*/
-#define WAIT_REG_MEM_MID_DIE_ID(x) (((x) & 0x3) << 12)
-#define WAIT_REG_MEM_XCD_DIE_ID(x) (((x) & 0xf) << 14)
-#define WAIT_REG_MEM_MES_INTR_PIPE(x) (((x) & 0x3) << 22)
-#define WAIT_REG_MEM_MES_ACTION(x) (((x) & 0x1) << 24)
-#define WAIT_REG_MEM_TEMPORAL(x) (((x) & 0x3) << 25)
+#define PACKET3_WAIT_REG_MEM__MID_DIE_ID(x) ((((unsigned)(x)) & 0x3) << 12)
+#define PACKET3_WAIT_REG_MEM__XCD_DIE_ID(x) ((((unsigned)(x)) & 0xf) << 14)
+#define PACKET3_WAIT_REG_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 22)
+#define PACKET3_WAIT_REG_MEM__MES_ACTION(x) ((((unsigned)(x)) & 0x1) << 24)
+#define PACKET3_WAIT_REG_MEM__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__RT 0
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__NT 1
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__HT 2
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__LU 3
/* 0 - rt
* 1 - nt
* 2 - ht
* 3 - lu
*/
+#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR1(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR_HI(x) ((((unsigned)(x)) & 0x3FFF) << 0)
+#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR2(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__REFERENCE(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__MASK(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__OPTIMIZE_ACE_OFFLOAD_MODE(x) ((((unsigned)(x)) & 0x1) << 31)
#define PACKET3_INDIRECT_BUFFER 0x3F
-#define INDIRECT_BUFFER_VALID (1 << 23)
-#define INDIRECT_BUFFER_TEMPORAL(x) (x) << 28)
- /* 0 - rt
- * 1 - nt
- * 2 - ht
- * 3 - lu
- */
+#define PACKET3_INDIRECT_BUFFER__IB_BASE_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_INDIRECT_BUFFER__IB_BASE_HI(x) ((unsigned)(x))
+#define PACKET3_INDIRECT_BUFFER__IB_SIZE(x) ((((unsigned)(x)) & 0xFFFFF) << 0)
+#define PACKET3_INDIRECT_BUFFER__CHAIN(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_INDIRECT_BUFFER__OFFLOAD_POLLING(x) ((((unsigned)(x)) & 0x1) << 21)
+#define PACKET3_INDIRECT_BUFFER__VALID(x) ((((unsigned)(x)) & 0x1) << 23)
+#define PACKET3_INDIRECT_BUFFER__VMID(x) ((((unsigned)(x)) & 0xF) << 24)
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 28)
+#define PACKET3_INDIRECT_BUFFER__INHERIT_VMID(x) ((((unsigned)(x)) & 0x1) << 30)
+#define PACKET3_INDIRECT_BUFFER__PRIV(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__RT 0
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__NT 1
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__HT 2
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__LU 3
#define PACKET3_COND_INDIRECT_BUFFER 0x3F
#define PACKET3_COPY_DATA 0x40
-#define COPY_DATA_SRC_SEL(x) (((x) & 0xf) << 0)
-#define COPY_DATA_DST_SEL(x) (((x) & 0xf) << 8)
-#define COPY_DATA_SRC_SCOPE(x) (((x) & 0x3) << 4)
-#define COPY_DATA_DST_SCOPE(x) (((x) & 0x3) << 27)
-#define COPY_DATA_MODE(x) (((x) & 0x3) << 6)
- /* 0 - local xcd
- * 1 - remote/local aid
- * 2 - remote xcd
- * 3 - remote mid
- */
-#define COPY_DATA_SRC_TEMPORAL(x) (((x) & 0x3) << 13)
-#define COPY_DATA_DST_TEMPORAL(x) (((x) & 0x3) << 25)
- /* 0 - rt
- * 1 - nt
- * 2 - ht
- * 3 - lu
- */
-#define COPY_DATA_COUNT_SEL (1 << 16)
-#define COPY_DATA_SRC_DST_REMOTE_MODE(x) (((x)) & 0x1 << 16)
- /* 0 - src remote
- * 1 - dst remote
- */
-#define COPY_DATA_MID_DIE_ID(x) (((x) & 0x3) << 18)
-#define COPY_DATA_XCD_DIE_ID(x) (((x) & 0xf) << 21)
-#define COPY_DATA_PQ_EXE_STATUS (1 << 27)
+#define PACKET3_COPY_DATA__SRC_SEL(x) ((((unsigned)(x)) & 0xF) << 0)
+#define PACKET3_COPY_DATA__SRC_SCOPE(x) ((((unsigned)(x)) & 0x3) << 4)
+#define PACKET3_COPY_DATA__MODE(x) ((((unsigned)(x)) & 0x3) << 6)
+#define PACKET3_COPY_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_COPY_DATA__SRC_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 13)
+#define PACKET3_COPY_DATA__COUNT_SEL(x) ((((unsigned)(x)) & 0x1) << 16)
+#define PACKET3_COPY_DATA__SRC_DST_REMOTE_MODE(x) ((((unsigned)(x)) & 0x1) << 17)
+#define PACKET3_COPY_DATA__MID_DIE_ID(x) ((((unsigned)(x)) & 0x3) << 18)
+#define PACKET3_COPY_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_COPY_DATA__XCD_DIE_ID(x) ((((unsigned)(x)) & 0xF) << 21)
+#define PACKET3_COPY_DATA__DST_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_COPY_DATA__DST_SCOPE(x) ((((unsigned)(x)) & 0x3) << 27)
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 29)
+#define PACKET3_COPY_DATA__SRC_REG_OFFSET_LO(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_COPY_DATA__SRC_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_COPY_DATA__IMM_DATA(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0x3FFF) << 0)
+#define PACKET3_COPY_DATA__SRC_MEMTC_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_IMM_DATA(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__DST_REG_OFFSET_LO(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__DST_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_COPY_DATA__DST_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_COPY_DATA__DST_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0x3FFF) << 0)
+#define PACKET3_COPY_DATA__DST_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_COPY_DATA__SRC_SEL__TC_L2_OBSOLETE 1
+#define PACKET3_COPY_DATA__SRC_SEL__TC_L2 2
+#define PACKET3_COPY_DATA__SRC_SEL__PERFCOUNTERS 4
+#define PACKET3_COPY_DATA__SRC_SEL__IMMEDIATE_DATA 5
+#define PACKET3_COPY_DATA__SRC_SEL__ATOMIC_RETURN_DATA 6
+#define PACKET3_COPY_DATA__SRC_SEL__GPU_CLOCK_COUNT 9
+#define PACKET3_COPY_DATA__SRC_SEL__SYSTEM_CLOCK_COUNT 10
+#define PACKET3_COPY_DATA__SRC_SCOPE__CU 0
+#define PACKET3_COPY_DATA__SRC_SCOPE__SE 1
+#define PACKET3_COPY_DATA__SRC_SCOPE__DEVICE 2
+#define PACKET3_COPY_DATA__SRC_SCOPE__SYSTEM 3
+#define PACKET3_COPY_DATA__MODE__LOCAL_XCD 0
+#define PACKET3_COPY_DATA__MODE__REMOTE_OR_LOCAL_AID 1
+#define PACKET3_COPY_DATA__MODE__REMOTE_XCD 2
+#define PACKET3_COPY_DATA__MODE__REMOTE_MID 3
+#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_COPY_DATA__DST_SEL__TC_L2 2
+#define PACKET3_COPY_DATA__DST_SEL__PERFCOUNTERS 4
+#define PACKET3_COPY_DATA__DST_SEL__TC_L2_OBSOLETE 5
+#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REG_DC 6
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__RT 0
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__NT 1
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__HT 2
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__LU 3
+#define PACKET3_COPY_DATA__COUNT_SEL__32_BITS_OF_DATA 0
+#define PACKET3_COPY_DATA__COUNT_SEL__64_BITS_OF_DATA 1
+#define PACKET3_COPY_DATA__SRC_DST_REMOTE_MODE__SRC_IS_REMOTE 0
+#define PACKET3_COPY_DATA__SRC_DST_REMOTE_MODE__DST_IS_REMOTE 1
+#define PACKET3_COPY_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_CONFIRMATION 0
+#define PACKET3_COPY_DATA__WR_CONFIRM__WAIT_FOR_CONFIRMATION 1
+#define PACKET3_COPY_DATA__DST_TEMPORAL__RT 0
+#define PACKET3_COPY_DATA__DST_TEMPORAL__NT 1
+#define PACKET3_COPY_DATA__DST_TEMPORAL__HT 2
+#define PACKET3_COPY_DATA__DST_TEMPORAL__LU 3
+#define PACKET3_COPY_DATA__DST_SCOPE__CU 0
+#define PACKET3_COPY_DATA__DST_SCOPE__SE 1
+#define PACKET3_COPY_DATA__DST_SCOPE__DEVICE 2
+#define PACKET3_COPY_DATA__DST_SCOPE__SYSTEM 3
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS__DEFAULT 0
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS__PHASE_UPDATE 1
#define PACKET3_PFP_SYNC_ME 0x42
#define PACKET3_COND_WRITE 0x45
#define PACKET3_EVENT_WRITE 0x46
-#define EVENT_TYPE(x) ((x) << 0)
-#define EVENT_INDEX(x) ((x) << 8)
- /* 0 - any non-TS event
- * 1 - ZPASS_DONE, PIXEL_PIPE_STAT_*
- * 2 - SAMPLE_PIPELINESTAT
- * 3 - SAMPLE_STREAMOUTSTAT*
- * 4 - *S_PARTIAL_FLUSH
- */
+#define PACKET3_EVENT_WRITE__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0)
+#define PACKET3_EVENT_WRITE__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_EVENT_WRITE__OFFLOAD_ENABLE(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_EVENT_WRITE__ADDRESS_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_EVENT_WRITE__ADDRESS_HI(x) ((unsigned)(x))
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__OTHER 0
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_PIPELINESTAT 2
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__CS_PARTIAL_FLUSH 4
+#define PACKET3_EVENT_WRITE_EOP 0x47
+#define PACKET3_EVENT_WRITE_EOS 0x48
#define PACKET3_RELEASE_MEM 0x49
-#define PACKET3_RELEASE_MEM_EVENT_TYPE(x) ((x) << 0)
-#define PACKET3_RELEASE_MEM_EVENT_INDEX(x) ((x) << 8)
-#define PACKET3_RELEASE_MEM_GCR_GL2_SCOPE(x) ((x) << 12)
-#define PACKET3_RELEASE_MEM_GCR_GLV_INV (1 << 14)
-#define PACKET3_RELEASE_MEM_GCR_GL2_US (1 << 16)
-#define PACKET3_RELEASE_MEM_GCR_GL2_RANGE(x) ((x) << 17)
-#define PACKET3_RELEASE_MEM_GCR_GL2_DISCARD (1 << 19)
-#define PACKET3_RELEASE_MEM_GCR_GL2_INV (1 << 20)
-#define PACKET3_RELEASE_MEM_GCR_GL2_WB (1 << 21)
-#define PACKET3_RELEASE_MEM_GCR_SEQ(x) ((x) << 22)
-#define PACKET3_RELEASE_MEM_GCR_GLV_WB (1 << 24)
-#define PACKET3_RELEASE_MEM_TEMPORAL(x) ((x) << 25)
+#define PACKET3_RELEASE_MEM__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0)
+#define PACKET3_RELEASE_MEM__WAIT_SYNC(x) ((((unsigned)(x)) & 0x1) << 7)
+#define PACKET3_RELEASE_MEM__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_RELEASE_MEM__EVENT_INDEX__END_OF_PIPE 5
+#define PACKET3_RELEASE_MEM__EVENT_INDEX__SHADER_DONE 6
+#define PACKET3_RELEASE_MEM__GCR_CNTL(x) ((((unsigned)(x)) & 0x1FFF) << 12)
+#define PACKET3_RELEASE_MEM__GCR_GL2_SCOPE(x) ((x) << 12)
+#define PACKET3_RELEASE_MEM__GCR_GLV_INV (1 << 14)
+#define PACKET3_RELEASE_MEM__GCR_GL2_US (1 << 16)
+#define PACKET3_RELEASE_MEM__GCR_GL2_RANGE(x) ((x) << 17)
+#define PACKET3_RELEASE_MEM__GCR_GL2_DISCARD (1 << 19)
+#define PACKET3_RELEASE_MEM__GCR_GL2_INV (1 << 20)
+#define PACKET3_RELEASE_MEM__GCR_GL2_WB (1 << 21)
+#define PACKET3_RELEASE_MEM__GCR_SEQ(x) ((x) << 22)
+#define PACKET3_RELEASE_MEM__GCR_GLV_WB (1 << 24)
+#define PACKET3_RELEASE_MEM__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_RELEASE_MEM__TEMPORAL__RT 0
+#define PACKET3_RELEASE_MEM__TEMPORAL__NT 1
+#define PACKET3_RELEASE_MEM__TEMPORAL__HT 2
+#define PACKET3_RELEASE_MEM__TEMPORAL__LU 3
/* 0 - temporal__release_mem__rt
* 1 - temporal__release_mem__nt
* 2 - temporal__release_mem__ht
* 3 - temporal__release_mem__lu
*/
-#define PACKET3_RELEASE_MEM_PQ_EXE_STATUS (1 << 28)
-#define PACKET3_RELEASE_MEM_GCR_GLK_INV (1 << 30)
+#define PACKET3_RELEASE_MEM__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 28)
+#define PACKET3_RELEASE_MEM__PQ_EXE_STATUS__DEFAULT 0
+#define PACKET3_RELEASE_MEM__PQ_EXE_STATUS__PHASE_UPDATE 1
+#define PACKET3_RELEASE_MEM__GCR_GLK_INV (1 << 30)
-#define PACKET3_RELEASE_MEM_DST_SEL(x) ((x) << 16)
- /* 0 - memory controller
- * 1 - TC/L2
- * 2 - register
- */
-#define PACKET3_RELEASE_MEM_MES_INTR_PIPE(x) ((x) << 20)
-#define PACKET3_RELEASE_MEM_MES_ACTION_ID(x) ((x) << 22)
-#define PACKET3_RELEASE_MEM_INT_SEL(x) ((x) << 24)
- /* 0 - none
- * 1 - interrupt only (DATA_SEL = 0)
- * 2 - interrupt when data write is confirmed
- */
-#define PACKET3_RELEASE_MEM_ADD_DOOREBLL_OFFSET(x) (1 << 28)
-#define PACKET3_RELEASE_MEM_DATA_SEL(x) ((x) << 29)
+#define PACKET3_RELEASE_MEM__DST_SEL(x) ((((unsigned)(x)) & 0x3) << 16)
+#define PACKET3_RELEASE_MEM__DST_SEL__MEMORY_CONTROLLER 0
+#define PACKET3_RELEASE_MEM__DST_SEL__TC_L2 1
+#define PACKET3_RELEASE_MEM__DST_SEL__QUQUE_WRITE_POINTER_REGISTER 2
+#define PACKET3_RELEASE_MEM__DST_SEL__QUQUE_WRITE_POINTER_POLL_MASK_BIT 3
+#define PACKET3_RELEASE_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 20)
+#define PACKET3_RELEASE_MEM__MES_ACTION_ID(x) ((((unsigned)(x)) & 0x3) << 22)
+#define PACKET3_RELEASE_MEM__MES_ACTION_ID__NO_MES_NOTIFICATION 0
+#define PACKET3_RELEASE_MEM__MES_ACTION_ID__INTERRUPT_AND_FENCE 1
+#define PACKET3_RELEASE_MEM__MES_ACTION_ID__INTERRUPT_NO_FENCE_THEN_ADDRESS_PAYLOAD 2
+#define PACKET3_RELEASE_MEM__MES_ACTION_ID__INTERRUPT_AND_ADDRESS_PAYLOAD 3
+#define PACKET3_RELEASE_MEM__INT_SEL(x) ((((unsigned)(x)) & 0x7) << 24)
+#define PACKET3_RELEASE_MEM__INT_SEL__NONE 0
+#define PACKET3_RELEASE_MEM__INT_SEL__SEND_INTERRUPT_ONLY 1
+#define PACKET3_RELEASE_MEM__INT_SEL__SEND_INTERRUPT_AFTER_WRITE_CONFIRM 2
+#define PACKET3_RELEASE_MEM__INT_SEL__SEND_DATA_AND_WRITE_CONFIRM 3
+#define PACKET3_RELEASE_MEM__INT_SEL__UNCONDITIONALLY_SEND_INT_CTXID 4
+#define PACKET3_RELEASE_MEM__INT_SEL__UNCONDITIONALLY_SEND_INT_CTXID_BASED_ON_32_BIT_COMPARE 5
+#define PACKET3_RELEASE_MEM__INT_SEL__UNCONDITIONALLY_SEND_INT_CTXID_BASED_ON_64_BIT_COMPARE 6
+#define PACKET3_RELEASE_MEM__ADD_DOOREBLL_OFFSET(x) ((((unsigned)(x)) & 0x1) << 28)
+#define PACKET3_RELEASE_MEM__DATA_SEL(x) ((((unsigned)(x)) & 0x7) << 29)
+#define PACKET3_RELEASE_MEM__DATA_SEL__NONE 0
+#define PACKET3_RELEASE_MEM__DATA_SEL__SEND_32_BIT_LOW 1
+#define PACKET3_RELEASE_MEM__DATA_SEL__SEND_64_BIT_DATA 2
+#define PACKET3_RELEASE_MEM__DATA_SEL__SEND_GPU_CLOCK_COUNTER 3
+#define PACKET3_RELEASE_MEM__DATA_SEL__SEND_SYSTEM_CLOCK_COUNTER 4
/* 0 - discard
* 1 - send low 32bit data
* 2 - send 64bit data
* 3 - send 64bit GPU counter value
* 4 - send 64bit sys counter value
*/
+#define PACKET3_RELEASE_MEM__ADDRESS_LO_32B(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_RELEASE_MEM__ADDRESS_LO_64B(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_RELEASE_MEM__ADDRESS_HI(x) ((unsigned)(x))
+#define PACKET3_RELEASE_MEM__DATA_LO(x) ((unsigned)(x))
+#define PACKET3_RELEASE_MEM__CMP_DATA_LO(x) ((unsigned)(x))
+#define PACKET3_RELEASE_MEM__DATA_HI(x) ((unsigned)(x))
+#define PACKET3_RELEASE_MEM__CMP_DATA_HI(x) ((unsigned)(x))
+#define PACKET3_RELEASE_MEM__INT_CTXID(x) ((unsigned)(x))
#define PACKET3_PREAMBLE_CNTL 0x4A
# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
@@ -370,11 +527,19 @@
#define PACKET3_SET_SH_REG 0x76
#define PACKET3_SET_SH_REG_START 0x00002c00
#define PACKET3_SET_SH_REG_END 0x00003000
+#define PACKET3_SET_SH_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_SET_SH_REG__VMID_SHIFT(x) ((((unsigned)(x)) & 0x1F) << 23)
+#define PACKET3_SET_SH_REG__INDEX(x) ((((unsigned)(x)) & 0xF) << 28)
+#define PACKET3_SET_SH_REG__REG_DATA(x) (((unsigned)(x)))
+#define PACKET3_SET_SH_REG__INDEX__DEFAULT 0
+#define PACKET3_SET_SH_REG__INDEX__INSERT_VMID 1
#define PACKET3_SET_SH_REG_OFFSET 0x77
#define PACKET3_SET_QUEUE_REG 0x78
#define PACKET3_SET_UCONFIG_REG 0x79
#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
+#define PACKET3_SET_UCONFIG_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_SET_UCONFIG_REG__REG_DATA(x) (((unsigned)(x)))
#define PACKET3_SET_UCONFIG_REG_INDEX 0x7A
#define PACKET3_DISPATCH_DRAW_PREAMBLE 0x8C
#define PACKET3_DISPATCH_DRAW 0x8D
--
2.53.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH] drm/amdgpu: In GFX12.1 CU is same as WGP
2026-04-22 21:50 [PATCH] drm/amdgpu: update cp cmd pkt for gfx v12_1 Alex Deucher
@ 2026-04-22 21:50 ` Alex Deucher
2026-04-22 21:50 ` [PATCH] drm/amdgpu: Switch to gfx_v12_1_get_xccs_per_xcp Alex Deucher
1 sibling, 0 replies; 3+ messages in thread
From: Alex Deucher @ 2026-04-22 21:50 UTC (permalink / raw)
To: amd-gfx; +Cc: Harish Kasiviswanathan, Felix Kuehling, Alex Deucher
From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Fix this for current ip discovery table.
v2: Move the change to gfx12 file
v3: Change only for the current version of ip discovery table
v4: Squash in build fixes
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 25 +++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h | 2 ++
drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c | 10 ++++++++
3 files changed, 37 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 193091583847b..fef90a1d69441 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -3423,3 +3423,28 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
return 0;
}
+int amdgpu_discovery_get_gc_major_minor_version(struct amdgpu_device *adev,
+ uint16_t *major, uint16_t *minor)
+{
+ uint8_t *discovery_bin = adev->discovery.bin;
+ struct table_info *info;
+ union gc_info *gc_info;
+ u16 offset;
+
+ if (!discovery_bin)
+ return -EINVAL;
+ if (amdgpu_discovery_get_table_info(adev, &info, GC))
+ return -EINVAL;
+
+ offset = le16_to_cpu(info->offset);
+ if (!offset)
+ return -EINVAL;
+
+ gc_info = (union gc_info *)(discovery_bin + offset);
+
+ if (major)
+ *major = le16_to_cpu(gc_info->v1.header.version_major);
+ if (minor)
+ *minor = le16_to_cpu(gc_info->v1.header.version_minor);
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index 0ff1a7923eedf..e0010f6a3eda5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -48,6 +48,8 @@ int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
uint32_t *nps_type,
struct amdgpu_gmc_memrange *ranges,
int *range_cnt, bool refresh);
+int amdgpu_discovery_get_gc_major_minor_version(struct amdgpu_device *adev,
+ uint16_t *major, uint16_t *minor);
void amdgpu_discovery_dump(struct amdgpu_device *adev, struct drm_printer *p);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
index f4089ab108474..6872ce3f3ebbe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -1142,6 +1142,7 @@ static int gfx_v12_1_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
static int gfx_v12_1_sw_init(struct amdgpu_ip_block *ip_block)
{
+ uint16_t major_ver, minor_ver;
int i, j, k, r, ring_id = 0;
unsigned num_compute_rings;
int xcc_id, num_xcc;
@@ -1152,6 +1153,15 @@ static int gfx_v12_1_sw_init(struct amdgpu_ip_block *ip_block)
adev->gfx.mec.num_mec = 1;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
+
+ if (!amdgpu_discovery_get_gc_major_minor_version(
+ adev, &major_ver, &minor_ver)) {
+ if (major_ver == 1 && minor_ver == 3) {
+ adev->gfx.config.max_cu_per_sh /= 2;
+ dev_dbg(adev->dev, "Halving max_cu_per_sh for GC Discovery table v1:3 %d\n",
+ adev->gfx.config.max_cu_per_sh);
+ }
+ }
break;
default:
adev->gfx.mec.num_mec = 2;
--
2.53.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH] drm/amdgpu: Switch to gfx_v12_1_get_xccs_per_xcp
2026-04-22 21:50 [PATCH] drm/amdgpu: update cp cmd pkt for gfx v12_1 Alex Deucher
2026-04-22 21:50 ` [PATCH] drm/amdgpu: In GFX12.1 CU is same as WGP Alex Deucher
@ 2026-04-22 21:50 ` Alex Deucher
1 sibling, 0 replies; 3+ messages in thread
From: Alex Deucher @ 2026-04-22 21:50 UTC (permalink / raw)
To: amd-gfx; +Cc: Hawking Zhang, Le Ma, Alex Deucher
From: Hawking Zhang <Hawking.Zhang@amd.com>
Use gfx v12_1 callback to query the numbers of xccs
per xcp
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Le Ma <le.ma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
index 6872ce3f3ebbe..5412a528f78be 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -718,10 +718,19 @@ static void gfx_v12_1_select_me_pipe_q(struct amdgpu_device *adev,
soc_v1_0_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id));
}
+#define regGFX_IMU_PARTITION_SWITCH 0x5f8c
+#define regGFX_IMU_PARTITION_SWITCH_BASE_IDX 1
+#define GFX_IMU_PARTITION_SWITCH__TOTAL_XCCS_IN_XCP__SHIFT 0x2
+#define GFX_IMU_PARTITION_SWITCH__TOTAL_XCCS_IN_XCP_MASK 0x0000003CL
+
static int gfx_v12_1_get_xccs_per_xcp(struct amdgpu_device *adev)
{
- /* Fill this in when the interface is ready */
- return 1;
+ u32 reg_data;
+
+ /* the register data is expected to be the same on all instances */
+ reg_data = RREG32_SOC15(GC, GET_INST(GC, 0), regGFX_IMU_PARTITION_SWITCH);
+
+ return REG_GET_FIELD(reg_data, GFX_IMU_PARTITION_SWITCH, TOTAL_XCCS_IN_XCP);
}
static int gfx_v12_1_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node)
--
2.53.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-04-22 21:51 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-22 21:50 [PATCH] drm/amdgpu: update cp cmd pkt for gfx v12_1 Alex Deucher
2026-04-22 21:50 ` [PATCH] drm/amdgpu: In GFX12.1 CU is same as WGP Alex Deucher
2026-04-22 21:50 ` [PATCH] drm/amdgpu: Switch to gfx_v12_1_get_xccs_per_xcp Alex Deucher
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.