AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdkfd: gfx12.1 trap handler instruction fixup for VOP3PX
@ 2026-01-22 15:04 Jay Cornwall
  2026-01-28 10:54 ` Lancelot SIX
  0 siblings, 1 reply; 2+ messages in thread
From: Jay Cornwall @ 2026-01-22 15:04 UTC (permalink / raw)
  To: amd-gfx; +Cc: Jay Cornwall, Lancelot Six, Vladimir Indic, Shweta Khatri

A trap may occur in the middle of VOP3PX instruction co-issue.
The PC would be restored incorrectly if left unmodified.

Identify this case by examining the instruction opcode and
rewind the PC 8 bytes if it occurs.

Signed-off-by: Jay Cornwall <jay.cornwall@amd.com>
Cc: Lancelot Six <lancelot.six@amd.com>
Cc: Vladimir Indic <vladimir.indic@amd.com>
Cc: Shweta Khatri <shweta.khatri@amd.com>
---
 .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h    | 197 +++++++++---------
 .../amd/amdkfd/cwsr_trap_handler_gfx12.asm    |  25 ++-
 2 files changed, 121 insertions(+), 101 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 39bdc98b8b6d..54fa76f374c9 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -4587,14 +4587,14 @@ static const uint32_t cwsr_trap_gfx9_5_0_hex[] = {
 };
 
 static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
-	0xbfa00001, 0xbfa003ac,
+	0xbfa00001, 0xbfa003be,
 	0xb0804009, 0xb8f8f804,
 	0x9178ff78, 0x00008c00,
 	0xb8fbf811, 0x8b6eff78,
 	0x00004000, 0xbfa10008,
 	0x8b6eff7b, 0x00000080,
 	0xbfa20018, 0x8b6ea07b,
-	0xbfa200d1, 0xbf830010,
+	0xbfa200da, 0xbf830010,
 	0xb8fbf811, 0xbfa0fffb,
 	0x8b6eff7b, 0x00000bd0,
 	0xbfa20010, 0xb8eef812,
@@ -4605,7 +4605,7 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
 	0xf0000000, 0xbfa20005,
 	0x8b6fff6f, 0x00000200,
 	0xbfa20002, 0x8b6ea07b,
-	0xbfa200bb, 0x9177ff77,
+	0xbfa200c4, 0x9177ff77,
 	0x007fc000, 0xb8fa04a1,
 	0x847a967a, 0x8c777a77,
 	0xb8fa0421, 0x847a957a,
@@ -4632,43 +4632,46 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
 	0xbfa00002, 0x806c846c,
 	0x826d806d, 0x8b6dff6d,
 	0x01ffffff, 0xb8fbf811,
-	0xbf0d847b, 0xbfa20078,
+	0xbf0d847b, 0xbfa20081,
 	0xf4003eb6, 0xf8000000,
 	0xbfc70000, 0xf4003bb6,
 	0xf8000008, 0x8b76ff7a,
 	0x80000000, 0xbfa20027,
 	0x9376ff7a, 0x00060019,
 	0x81f9a376, 0xbf0b8179,
-	0xbfa20068, 0x81f9ac76,
-	0xbf0b8179, 0xbfa20062,
+	0xbfa2006e, 0x81f9ac76,
+	0xbf0b8179, 0xbfa20068,
 	0x81f9b776, 0xbf0b8179,
-	0xbfa2005f, 0x8b76ff7a,
+	0xbfa20065, 0x8b76ff7a,
 	0x000001ff, 0xbf06ff76,
-	0x000000fe, 0xbfa2005d,
+	0x000000fe, 0xbfa20063,
 	0xbf06ff76, 0x000000ff,
-	0xbfa20057, 0xbf06ff76,
-	0x000000fa, 0xbfa20054,
+	0xbfa2005d, 0xbf06ff76,
+	0x000000fa, 0xbfa2005a,
 	0x81f9ff76, 0x000000e9,
-	0xbf0b8179, 0xbfa20050,
+	0xbf0b8179, 0xbfa20056,
 	0x8b76ff7b, 0xffff0000,
 	0xbf06ff76, 0xbf860000,
-	0xbfa10051, 0x9376ff7b,
+	0xbfa1005a, 0x9376ff7b,
 	0x0002000e, 0x8b79ff7b,
 	0x00003f00, 0x85798679,
 	0x8c767976, 0xb9763b01,
-	0xbfa00049, 0x8b76ff7a,
+	0xbfa00052, 0x8b76ff7a,
 	0xfc000000, 0xbf06ff76,
-	0xd4000000, 0xbfa20013,
+	0xd4000000, 0xbfa20019,
 	0xbf06ff76, 0xc8000000,
-	0xbfa20027, 0x8b76ff7a,
+	0xbfa2002d, 0x8b76ff7a,
 	0xff000000, 0xbf06ff76,
-	0xcf000000, 0xbfa20039,
+	0xcf000000, 0xbfa2003f,
 	0x8b79ff7a, 0xffff0000,
+	0xbf06ff79, 0xcc330000,
+	0xbfa2003d, 0xbf06ff79,
+	0xcc880000, 0xbfa2003a,
 	0xbf06ff79, 0xcc350000,
-	0xbfa20037, 0xbf06ff79,
-	0xcc3a0000, 0xbfa20034,
+	0xbfa2003a, 0xbf06ff79,
+	0xcc3a0000, 0xbfa20037,
 	0xbf06ff76, 0xcc000000,
-	0xbfa10031, 0x8b76ff7b,
+	0xbfa10034, 0x8b76ff7b,
 	0x000001ff, 0xbf06ff76,
 	0x000000ff, 0xbfa20029,
 	0xbf06ff76, 0x000000fa,
@@ -4691,86 +4694,92 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
 	0x000001ff, 0xbf06ff76,
 	0x000000ff, 0xbfa20003,
 	0xbfc70000, 0xbefb006e,
-	0xbfa0ffad, 0xbfc70000,
-	0xbefb006f, 0xbfa0ffaa,
-	0xbfc70000, 0x857a9677,
-	0xb97a04a1, 0x857a9577,
-	0xb97a0421, 0x857a8e77,
-	0xb97a3021, 0x8bfe7e7e,
-	0x8bea6a6a, 0x85788978,
-	0xb9783244, 0xbe804a6c,
-	0xb8faf802, 0xbf0d987a,
-	0xbfa10001, 0xbfb00000,
-	0x8b6dff6d, 0x01ffffff,
-	0xbefa0080, 0xb97a0151,
-	0x9177ff77, 0x007fc000,
-	0xb8fa04a1, 0x847a967a,
-	0x8c777a77, 0xb8fa0421,
-	0x847a957a, 0x8c777a77,
-	0xb8fa3021, 0x847a8e7a,
-	0x8c777a77, 0xb980f821,
-	0x00000000, 0xbf0d847b,
-	0xbfa20078, 0xf4003eb6,
-	0xf8000000, 0xbfc70000,
-	0xf4003bb6, 0xf8000008,
-	0x8b76ff7a, 0x80000000,
-	0xbfa20027, 0x9376ff7a,
-	0x00060019, 0x81f9a376,
+	0xbfa0ffa7, 0xbfc70000,
+	0xbefb006f, 0xbfa0ffa4,
+	0x80ec886c, 0x82ed806d,
+	0xbfa0fff7, 0xbfc70000,
+	0x857a9677, 0xb97a04a1,
+	0x857a9577, 0xb97a0421,
+	0x857a8e77, 0xb97a3021,
+	0x8bfe7e7e, 0x8bea6a6a,
+	0x85788978, 0xb9783244,
+	0xbe804a6c, 0xb8faf802,
+	0xbf0d987a, 0xbfa10001,
+	0xbfb00000, 0x8b6dff6d,
+	0x01ffffff, 0xbefa0080,
+	0xb97a0151, 0x9177ff77,
+	0x007fc000, 0xb8fa04a1,
+	0x847a967a, 0x8c777a77,
+	0xb8fa0421, 0x847a957a,
+	0x8c777a77, 0xb8fa3021,
+	0x847a8e7a, 0x8c777a77,
+	0xb980f821, 0x00000000,
+	0xbf0d847b, 0xbfa20081,
+	0xf4003eb6, 0xf8000000,
+	0xbfc70000, 0xf4003bb6,
+	0xf8000008, 0x8b76ff7a,
+	0x80000000, 0xbfa20027,
+	0x9376ff7a, 0x00060019,
+	0x81f9a376, 0xbf0b8179,
+	0xbfa2006e, 0x81f9ac76,
 	0xbf0b8179, 0xbfa20068,
-	0x81f9ac76, 0xbf0b8179,
-	0xbfa20062, 0x81f9b776,
-	0xbf0b8179, 0xbfa2005f,
-	0x8b76ff7a, 0x000001ff,
-	0xbf06ff76, 0x000000fe,
+	0x81f9b776, 0xbf0b8179,
+	0xbfa20065, 0x8b76ff7a,
+	0x000001ff, 0xbf06ff76,
+	0x000000fe, 0xbfa20063,
+	0xbf06ff76, 0x000000ff,
 	0xbfa2005d, 0xbf06ff76,
-	0x000000ff, 0xbfa20057,
+	0x000000fa, 0xbfa2005a,
+	0x81f9ff76, 0x000000e9,
+	0xbf0b8179, 0xbfa20056,
+	0x8b76ff7b, 0xffff0000,
+	0xbf06ff76, 0xbf860000,
+	0xbfa1005a, 0x9376ff7b,
+	0x0002000e, 0x8b79ff7b,
+	0x00003f00, 0x85798679,
+	0x8c767976, 0xb9763b01,
+	0xbfa00052, 0x8b76ff7a,
+	0xfc000000, 0xbf06ff76,
+	0xd4000000, 0xbfa20019,
+	0xbf06ff76, 0xc8000000,
+	0xbfa2002d, 0x8b76ff7a,
+	0xff000000, 0xbf06ff76,
+	0xcf000000, 0xbfa2003f,
+	0x8b79ff7a, 0xffff0000,
+	0xbf06ff79, 0xcc330000,
+	0xbfa2003d, 0xbf06ff79,
+	0xcc880000, 0xbfa2003a,
+	0xbf06ff79, 0xcc350000,
+	0xbfa2003a, 0xbf06ff79,
+	0xcc3a0000, 0xbfa20037,
+	0xbf06ff76, 0xcc000000,
+	0xbfa10034, 0x8b76ff7b,
+	0x000001ff, 0xbf06ff76,
+	0x000000ff, 0xbfa20029,
 	0xbf06ff76, 0x000000fa,
-	0xbfa20054, 0x81f9ff76,
-	0x000000e9, 0xbf0b8179,
-	0xbfa20050, 0x8b76ff7b,
-	0xffff0000, 0xbf06ff76,
-	0xbf860000, 0xbfa10051,
-	0x9376ff7b, 0x0002000e,
-	0x8b79ff7b, 0x00003f00,
-	0x85798679, 0x8c767976,
-	0xb9763b01, 0xbfa00049,
-	0x8b76ff7a, 0xfc000000,
-	0xbf06ff76, 0xd4000000,
-	0xbfa20013, 0xbf06ff76,
-	0xc8000000, 0xbfa20027,
-	0x8b76ff7a, 0xff000000,
-	0xbf06ff76, 0xcf000000,
-	0xbfa20039, 0x8b79ff7a,
-	0xffff0000, 0xbf06ff79,
-	0xcc350000, 0xbfa20037,
-	0xbf06ff79, 0xcc3a0000,
-	0xbfa20034, 0xbf06ff76,
-	0xcc000000, 0xbfa10031,
-	0x8b76ff7b, 0x000001ff,
-	0xbf06ff76, 0x000000ff,
-	0xbfa20029, 0xbf06ff76,
-	0x000000fa, 0xbfa20026,
-	0x81f6ff76, 0x000000e9,
-	0xbf0b8176, 0xbfa20022,
-	0x8b76ff7b, 0x0003fe00,
-	0xbf06ff76, 0x0001fe00,
-	0xbfa2001d, 0x8b76ff7b,
-	0x07fc0000, 0xbf06ff76,
-	0x03fc0000, 0xbfa20018,
-	0xbfa00014, 0x9376ff7a,
-	0x00040016, 0x81f68176,
-	0xbf0b8176, 0xbfa20012,
-	0x9376ff7a, 0x00050011,
+	0xbfa20026, 0x81f6ff76,
+	0x000000e9, 0xbf0b8176,
+	0xbfa20022, 0x8b76ff7b,
+	0x0003fe00, 0xbf06ff76,
+	0x0001fe00, 0xbfa2001d,
+	0x8b76ff7b, 0x07fc0000,
+	0xbf06ff76, 0x03fc0000,
+	0xbfa20018, 0xbfa00014,
+	0x9376ff7a, 0x00040016,
 	0x81f68176, 0xbf0b8176,
-	0xbfa2000d, 0x8b76ff7a,
-	0x000001ff, 0xbf06ff76,
-	0x000000ff, 0xbfa20008,
-	0x8b76ff7b, 0x000001ff,
+	0xbfa20012, 0x9376ff7a,
+	0x00050011, 0x81f68176,
+	0xbf0b8176, 0xbfa2000d,
+	0x8b76ff7a, 0x000001ff,
 	0xbf06ff76, 0x000000ff,
-	0xbfa20003, 0xbfc70000,
-	0xbefb006e, 0xbfa0ffad,
-	0xbfc70000, 0xbefb006f,
-	0xbfa0ffaa, 0xbfc70000,
+	0xbfa20008, 0x8b76ff7b,
+	0x000001ff, 0xbf06ff76,
+	0x000000ff, 0xbfa20003,
+	0xbfc70000, 0xbefb006e,
+	0xbfa0ffa7, 0xbfc70000,
+	0xbefb006f, 0xbfa0ffa4,
+	0x80ec886c, 0x82ed806d,
+	0xbfa0fff7, 0xbfc70000,
 	0xbeee007e, 0xbeef007f,
 	0xbefe0180, 0xbefe4d84,
 	0xbf8a0000, 0x8b7aff7f,
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
index c33e7660d8f4..d38ff404277b 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
@@ -37,6 +37,7 @@
 #define HAVE_CLUSTER_BARRIER (ASIC_FAMILY == CHIP_GC_12_0_3)
 #define CLUSTER_BARRIER_SERIALIZE_WORKAROUND (ASIC_FAMILY == CHIP_GC_12_0_3)
 #define RELAXED_SCHEDULING_IN_TRAP (ASIC_FAMILY == CHIP_GFX12)
+#define HAVE_INSTRUCTION_FIXUP (ASIC_FAMILY == CHIP_GC_12_0_3)
 
 #define SINGLE_STEP_MISSED_WORKAROUND 1	//workaround for lost TRAP_AFTER_INST exception when SAVECTX raised
 #define HAVE_VALU_SGPR_HAZARD (ASIC_FAMILY == CHIP_GFX12)
@@ -372,9 +373,9 @@ L_TRAP_CASE:
 L_EXIT_TRAP:
 	s_and_b32	ttmp1, ttmp1, ADDRESS_HI32_MASK
 
-#if HAVE_BANKED_VGPRS
+#if HAVE_INSTRUCTION_FIXUP
 	s_getreg_b32	s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
-	fixup_vgpr_bank_selection()
+	fixup_instruction()
 #endif
 
 #if HAVE_XNACK
@@ -415,8 +416,8 @@ L_HAVE_VGPRS:
 	save_and_clear_xnack_state_priv(s_save_tmp)
 #endif
 
-#if HAVE_BANKED_VGPRS
-	fixup_vgpr_bank_selection()
+#if HAVE_INSTRUCTION_FIXUP
+	fixup_instruction()
 #endif
 
 	/* inform SPI the readiness and wait for SPI's go signal */
@@ -1397,8 +1398,8 @@ L_BARRIER_RESTORE_LOOP:
 L_BARRIER_RESTORE_DONE:
 end
 
-#if HAVE_BANKED_VGPRS
-function fixup_vgpr_bank_selection
+#if HAVE_INSTRUCTION_FIXUP
+function fixup_instruction
 	// PC read may fault if memory violation has been asserted.
 	// In this case no further progress is expected so fixup is not needed.
 	s_bitcmp1_b32	s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_SHIFT
@@ -1477,8 +1478,13 @@ L_FIXUP_NOT_VOP12C:
 	s_cmp_eq_u32	ttmp10, 0xcf000000					// If 31:24 = 0xcf, this is VOPD3
 	s_cbranch_scc1	L_FIXUP_THREE_DWORD					// If VOPD3, 3 DWORD inst
 	// Not VOP1, VOP2, VOPC, VOP3, VOP3SD, VOPD, or VOPD3.
-	// Might be in VOP3P, but we must ensure we are not VOP3PX2
+	// Check if we are in the middle of VOP3PX.
 	s_and_b32	ttmp13, ttmp14, 0xffff0000				// Bits 31:16
+	s_cmp_eq_u32	ttmp13, 0xcc330000					// If 31:16 = 0xcc33, this is 8 bytes past VOP3PX
+	s_cbranch_scc1	L_FIXUP_VOP3PX_MIDDLE
+	s_cmp_eq_u32	ttmp13, 0xcc880000					// If 31:16 = 0xcc88, this is 8 bytes past VOP3PX
+	s_cbranch_scc1	L_FIXUP_VOP3PX_MIDDLE
+	// Might be in VOP3P, but we must ensure we are not VOP3PX2
 	s_cmp_eq_u32	ttmp13, 0xcc350000					// If 31:16 = 0xcc35, this is VOP3PX2
 	s_cbranch_scc1	L_FIXUP_DONE						// If VOP3PX2, no fixup needed
 	s_cmp_eq_u32	ttmp13, 0xcc3a0000					// If 31:16 = 0xcc3a, this is VOP3PX2
@@ -1539,6 +1545,11 @@ L_FIXUP_THREE_DWORD:
 	s_mov_b32	ttmp15, ttmp3						// Move possible S_SET_VGPR_MSB into ttmp15
 	s_branch	L_FIXUP_ONE_DWORD					// Go to common logic that checks if it is S_SET_VGPR_MSB
 
+L_FIXUP_VOP3PX_MIDDLE:
+	s_sub_co_u32	ttmp0, ttmp0, 8						// Rewind PC 8 bytes to beginning of instruction
+	s_sub_co_ci_u32	ttmp1, ttmp1, 0
+	s_branch	L_FIXUP_TWO_DWORD					// 2 DWORD inst (2nd half of a 4 DWORD inst)
+
 L_FIXUP_DONE:
 	s_wait_kmcnt	0							// Ensure load of ttmp2 and ttmp3 is done
 end
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] drm/amdkfd: gfx12.1 trap handler instruction fixup for VOP3PX
  2026-01-22 15:04 [PATCH] drm/amdkfd: gfx12.1 trap handler instruction fixup for VOP3PX Jay Cornwall
@ 2026-01-28 10:54 ` Lancelot SIX
  0 siblings, 0 replies; 2+ messages in thread
From: Lancelot SIX @ 2026-01-28 10:54 UTC (permalink / raw)
  To: Jay Cornwall, amd-gfx; +Cc: Vladimir Indic, Shweta Khatri

Hi Jay,

On 22/01/2026 15:04, Jay Cornwall wrote:
> A trap may occur in the middle of VOP3PX instruction co-issue.
> The PC would be restored incorrectly if left unmodified.
> 
> Identify this case by examining the instruction opcode and
> rewind the PC 8 bytes if it occurs.
> 
> Signed-off-by: Jay Cornwall <jay.cornwall@amd.com>
> Cc: Lancelot Six <lancelot.six@amd.com>
> Cc: Vladimir Indic <vladimir.indic@amd.com>
> Cc: Shweta Khatri <shweta.khatri@amd.com>

This looks good to me, thanks.

Reviewed-by: Lancelot Six <lancelot.six@amd.com>
> ---
>   .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h    | 197 +++++++++---------
>   .../amd/amdkfd/cwsr_trap_handler_gfx12.asm    |  25 ++-
>   2 files changed, 121 insertions(+), 101 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
> index c33e7660d8f4..d38ff404277b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
> +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
> @@ -37,6 +37,7 @@
>   #define HAVE_CLUSTER_BARRIER (ASIC_FAMILY == CHIP_GC_12_0_3)
>   #define CLUSTER_BARRIER_SERIALIZE_WORKAROUND (ASIC_FAMILY == CHIP_GC_12_0_3)
>   #define RELAXED_SCHEDULING_IN_TRAP (ASIC_FAMILY == CHIP_GFX12)
> +#define HAVE_INSTRUCTION_FIXUP (ASIC_FAMILY == CHIP_GC_12_0_3)
>   
>   #define SINGLE_STEP_MISSED_WORKAROUND 1	//workaround for lost TRAP_AFTER_INST exception when SAVECTX raised
>   #define HAVE_VALU_SGPR_HAZARD (ASIC_FAMILY == CHIP_GFX12)
> @@ -372,9 +373,9 @@ L_TRAP_CASE:
>   L_EXIT_TRAP:
>   	s_and_b32	ttmp1, ttmp1, ADDRESS_HI32_MASK
>   
> -#if HAVE_BANKED_VGPRS
> +#if HAVE_INSTRUCTION_FIXUP
>   	s_getreg_b32	s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
> -	fixup_vgpr_bank_selection()
> +	fixup_instruction()
>   #endif
>   
>   #if HAVE_XNACK
> @@ -415,8 +416,8 @@ L_HAVE_VGPRS:
>   	save_and_clear_xnack_state_priv(s_save_tmp)
>   #endif
>   
> -#if HAVE_BANKED_VGPRS
> -	fixup_vgpr_bank_selection()
> +#if HAVE_INSTRUCTION_FIXUP
> +	fixup_instruction()
>   #endif
>   
>   	/* inform SPI the readiness and wait for SPI's go signal */
> @@ -1397,8 +1398,8 @@ L_BARRIER_RESTORE_LOOP:
>   L_BARRIER_RESTORE_DONE:
>   end
>   
> -#if HAVE_BANKED_VGPRS
> -function fixup_vgpr_bank_selection
> +#if HAVE_INSTRUCTION_FIXUP
> +function fixup_instruction
>   	// PC read may fault if memory violation has been asserted.
>   	// In this case no further progress is expected so fixup is not needed.
>   	s_bitcmp1_b32	s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_SHIFT
> @@ -1477,8 +1478,13 @@ L_FIXUP_NOT_VOP12C:
>   	s_cmp_eq_u32	ttmp10, 0xcf000000					// If 31:24 = 0xcf, this is VOPD3
>   	s_cbranch_scc1	L_FIXUP_THREE_DWORD					// If VOPD3, 3 DWORD inst
>   	// Not VOP1, VOP2, VOPC, VOP3, VOP3SD, VOPD, or VOPD3.
> -	// Might be in VOP3P, but we must ensure we are not VOP3PX2
> +	// Check if we are in the middle of VOP3PX.
>   	s_and_b32	ttmp13, ttmp14, 0xffff0000				// Bits 31:16
> +	s_cmp_eq_u32	ttmp13, 0xcc330000					// If 31:16 = 0xcc33, this is 8 bytes past VOP3PX
> +	s_cbranch_scc1	L_FIXUP_VOP3PX_MIDDLE
> +	s_cmp_eq_u32	ttmp13, 0xcc880000					// If 31:16 = 0xcc88, this is 8 bytes past VOP3PX
> +	s_cbranch_scc1	L_FIXUP_VOP3PX_MIDDLE
> +	// Might be in VOP3P, but we must ensure we are not VOP3PX2
>   	s_cmp_eq_u32	ttmp13, 0xcc350000					// If 31:16 = 0xcc35, this is VOP3PX2
>   	s_cbranch_scc1	L_FIXUP_DONE						// If VOP3PX2, no fixup needed
>   	s_cmp_eq_u32	ttmp13, 0xcc3a0000					// If 31:16 = 0xcc3a, this is VOP3PX2
> @@ -1539,6 +1545,11 @@ L_FIXUP_THREE_DWORD:
>   	s_mov_b32	ttmp15, ttmp3						// Move possible S_SET_VGPR_MSB into ttmp15
>   	s_branch	L_FIXUP_ONE_DWORD					// Go to common logic that checks if it is S_SET_VGPR_MSB
>   
> +L_FIXUP_VOP3PX_MIDDLE:
> +	s_sub_co_u32	ttmp0, ttmp0, 8						// Rewind PC 8 bytes to beginning of instruction
> +	s_sub_co_ci_u32	ttmp1, ttmp1, 0
> +	s_branch	L_FIXUP_TWO_DWORD					// 2 DWORD inst (2nd half of a 4 DWORD inst)
> +
>   L_FIXUP_DONE:
>   	s_wait_kmcnt	0							// Ensure load of ttmp2 and ttmp3 is done
>   end


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-01-28 10:54 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-22 15:04 [PATCH] drm/amdkfd: gfx12.1 trap handler instruction fixup for VOP3PX Jay Cornwall
2026-01-28 10:54 ` Lancelot SIX

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox