AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdkfd: Extend gfx12 trap handler fix to gfx10/11
@ 2024-06-05 23:16 Jay Cornwall
  2024-06-06 13:05 ` Alex Deucher
  2024-06-07 13:08 ` Lancelot SIX
  0 siblings, 2 replies; 3+ messages in thread
From: Jay Cornwall @ 2024-06-05 23:16 UTC (permalink / raw)
  To: amd-gfx; +Cc: Jay Cornwall, Lancelot Six

In commit 6d1878882d2d
("drm/amdkfd: gfx12 context save/restore trap handler fixes") the
following fix was introduced but incorrectly restricted to gfx12.
The same issue and a corresponding fix apply to gfx10 and gfx11.

Do not overwrite TRAPSTS.{SAVECTX,HOST_TRAP} when restoring this
register. Both of these fields can assert while the wavefront is
running the trap handler.

Signed-off-by: Jay Cornwall <jay.cornwall@amd.com>
Cc: Lancelot Six <lancelot.six@amd.com>
---
 .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h    | 16 +++++---
 .../amd/amdkfd/cwsr_trap_handler_gfx10.asm    | 38 ++++++++++++++-----
 2 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 665122d1bbbd..02f7ba8c93cd 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -1136,7 +1136,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
 	0x705d0000, 0x807c817c,
 	0x8070ff70, 0x00000080,
 	0xbf0a7b7c, 0xbf85fff8,
-	0xbf82013d, 0xbef4037e,
+	0xbf82013f, 0xbef4037e,
 	0x8775ff7f, 0x0000ffff,
 	0x8875ff75, 0x00040000,
 	0xbef60380, 0xbef703ff,
@@ -1275,7 +1275,8 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
 	0x80788478, 0xbf8c0000,
 	0xb9eef815, 0xbefc036f,
 	0xbefe0370, 0xbeff0371,
-	0xb9f9f816, 0xb9fbf803,
+	0xb9f9f816, 0xb9fb4803,
+	0x907b8b7b, 0xb9fba2c3,
 	0xb9f3f801, 0xb96e3a05,
 	0x806e816e, 0xbf0d9972,
 	0xbf850002, 0x8f6e896e,
@@ -2544,7 +2545,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0xe0704000, 0x705d0000,
 	0x807c817c, 0x8070ff70,
 	0x00000080, 0xbf0a7b7c,
-	0xbf85fff8, 0xbf820134,
+	0xbf85fff8, 0xbf820136,
 	0xbef4037e, 0x8775ff7f,
 	0x0000ffff, 0x8875ff75,
 	0x00040000, 0xbef60380,
@@ -2683,7 +2684,8 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0xf0000000, 0x80788478,
 	0xbf8c0000, 0xb9eef815,
 	0xbefc036f, 0xbefe0370,
-	0xbeff0371, 0xb9fbf803,
+	0xbeff0371, 0xb9fb4803,
+	0x907b8b7b, 0xb9fba2c3,
 	0xb9f3f801, 0xb96e3a05,
 	0x806e816e, 0xbf0d9972,
 	0xbf850002, 0x8f6e896e,
@@ -2981,7 +2983,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
 	0x701d0000, 0x807d817d,
 	0x8070ff70, 0x00000080,
 	0xbf0a7b7d, 0xbfa2fff8,
-	0xbfa0013f, 0xbef4007e,
+	0xbfa00143, 0xbef4007e,
 	0x8b75ff7f, 0x0000ffff,
 	0x8c75ff75, 0x00040000,
 	0xbef60080, 0xbef700ff,
@@ -3123,7 +3125,9 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
 	0x80788478, 0xbf890000,
 	0xb96ef815, 0xbefd006f,
 	0xbefe0070, 0xbeff0071,
-	0xb97bf803, 0xb973f801,
+	0xb97b4803, 0x857b8b7b,
+	0xb97b22c3, 0x857b867b,
+	0xb97b7443, 0xb973f801,
 	0xb8ee3b05, 0x806e816e,
 	0xbf0d9972, 0xbfa20002,
 	0x846e896e, 0xbfa00001,
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index ac3702b8e3c4..44772eec9ef4 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -119,9 +119,12 @@ var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT		= 7
 var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK		= 0x100
 var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT		= 8
 var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK		= 0x800
+var SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT		= 11
 var SQ_WAVE_TRAPSTS_EXCP_HI_MASK		= 0x7000
 #if ASIC_FAMILY >= CHIP_PLUM_BONITO
+var SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT		= 16
 var SQ_WAVE_TRAPSTS_WAVE_START_MASK		= 0x20000
+var SQ_WAVE_TRAPSTS_WAVE_START_SHIFT		= 17
 var SQ_WAVE_TRAPSTS_WAVE_END_MASK		= 0x40000
 var SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK	= 0x100000
 #endif
@@ -137,14 +140,23 @@ var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK	= 0x003F8000
 
 var SQ_WAVE_MODE_DEBUG_EN_MASK			= 0x800
 
+var S_TRAPSTS_RESTORE_PART_1_SIZE		= SQ_WAVE_TRAPSTS_SAVECTX_SHIFT
+var S_TRAPSTS_RESTORE_PART_2_SHIFT		= SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT
+
 #if ASIC_FAMILY < CHIP_PLUM_BONITO
 var S_TRAPSTS_NON_MASKABLE_EXCP_MASK		= SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
+var S_TRAPSTS_RESTORE_PART_2_SIZE		= 32 - S_TRAPSTS_RESTORE_PART_2_SHIFT
+var S_TRAPSTS_RESTORE_PART_3_SHIFT		= 0
+var S_TRAPSTS_RESTORE_PART_3_SIZE		= 0
 #else
 var S_TRAPSTS_NON_MASKABLE_EXCP_MASK		= SQ_WAVE_TRAPSTS_MEM_VIOL_MASK		|\
 						  SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK	|\
 						  SQ_WAVE_TRAPSTS_WAVE_START_MASK	|\
 						  SQ_WAVE_TRAPSTS_WAVE_END_MASK		|\
 						  SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK
+var S_TRAPSTS_RESTORE_PART_2_SIZE		= SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT - SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT
+var S_TRAPSTS_RESTORE_PART_3_SHIFT		= SQ_WAVE_TRAPSTS_WAVE_START_SHIFT
+var S_TRAPSTS_RESTORE_PART_3_SIZE		= 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT
 #endif
 var S_TRAPSTS_HWREG				= HW_REG_TRAPSTS
 var S_TRAPSTS_SAVE_CONTEXT_MASK			= SQ_WAVE_TRAPSTS_SAVECTX_MASK
@@ -157,6 +169,7 @@ var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK	= 0x20
 var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK	= 0x40
 var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT	= 6
 var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK	= 0x80
+var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT	= 7
 var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK	= 0x100
 var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT	= 8
 var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK	= 0x200
@@ -173,6 +186,11 @@ var S_TRAPSTS_NON_MASKABLE_EXCP_MASK		= SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK		|\
 						  SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK	|\
 						  SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK		|\
 						  SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK
+var S_TRAPSTS_RESTORE_PART_1_SIZE		= SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
+var S_TRAPSTS_RESTORE_PART_2_SHIFT		= SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
+var S_TRAPSTS_RESTORE_PART_2_SIZE		= SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
+var S_TRAPSTS_RESTORE_PART_3_SHIFT		= SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT
+var S_TRAPSTS_RESTORE_PART_3_SIZE		= 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT
 var BARRIER_STATE_SIGNAL_OFFSET			= 16
 var BARRIER_STATE_VALID_OFFSET			= 0
 #endif
@@ -1386,17 +1404,17 @@ L_SKIP_BARRIER_RESTORE:
 	s_setreg_b32	hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask
 #endif
 
-#if ASIC_FAMILY < CHIP_GFX12
-	s_setreg_b32	hwreg(S_TRAPSTS_HWREG), s_restore_trapsts
-#else
-	// EXCP_FLAG_PRIV.SAVE_CONTEXT and HOST_TRAP may have changed.
+	// {TRAPSTS/EXCP_FLAG_PRIV}.SAVE_CONTEXT and HOST_TRAP may have changed.
 	// Only restore the other fields to avoid clobbering them.
-	s_setreg_b32	hwreg(S_TRAPSTS_HWREG, 0, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT), s_restore_trapsts
-	s_lshr_b32	s_restore_trapsts, s_restore_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
-	s_setreg_b32	hwreg(S_TRAPSTS_HWREG, SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT, 1), s_restore_trapsts
-	s_lshr_b32	s_restore_trapsts, s_restore_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
-	s_setreg_b32	hwreg(S_TRAPSTS_HWREG, SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT, 32 - SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT), s_restore_trapsts
-#endif
+	s_setreg_b32	hwreg(S_TRAPSTS_HWREG, 0, S_TRAPSTS_RESTORE_PART_1_SIZE), s_restore_trapsts
+	s_lshr_b32	s_restore_trapsts, s_restore_trapsts, S_TRAPSTS_RESTORE_PART_2_SHIFT
+	s_setreg_b32	hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_RESTORE_PART_2_SHIFT, S_TRAPSTS_RESTORE_PART_2_SIZE), s_restore_trapsts
+
+if S_TRAPSTS_RESTORE_PART_3_SIZE > 0
+	s_lshr_b32	s_restore_trapsts, s_restore_trapsts, S_TRAPSTS_RESTORE_PART_3_SHIFT - S_TRAPSTS_RESTORE_PART_2_SHIFT
+	s_setreg_b32	hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_RESTORE_PART_3_SHIFT, S_TRAPSTS_RESTORE_PART_3_SIZE), s_restore_trapsts
+end
+
 	s_setreg_b32	hwreg(HW_REG_MODE), s_restore_mode
 
 	// Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] drm/amdkfd: Extend gfx12 trap handler fix to gfx10/11
  2024-06-05 23:16 [PATCH] drm/amdkfd: Extend gfx12 trap handler fix to gfx10/11 Jay Cornwall
@ 2024-06-06 13:05 ` Alex Deucher
  2024-06-07 13:08 ` Lancelot SIX
  1 sibling, 0 replies; 3+ messages in thread
From: Alex Deucher @ 2024-06-06 13:05 UTC (permalink / raw)
  To: Jay Cornwall; +Cc: amd-gfx, Lancelot Six

On Wed, Jun 5, 2024 at 7:27 PM Jay Cornwall <jay.cornwall@amd.com> wrote:
>
> In commit 6d1878882d2d
> ("drm/amdkfd: gfx12 context save/restore trap handler fixes") the
> following fix was introduced but incorrectly restricted to gfx12.
> The same issue and a corresponding fix apply to gfx10 and gfx11.
>
> Do not overwrite TRAPSTS.{SAVECTX,HOST_TRAP} when restoring this
> register. Both of these fields can assert while the wavefront is
> running the trap handler.
>
> Signed-off-by: Jay Cornwall <jay.cornwall@amd.com>
> Cc: Lancelot Six <lancelot.six@amd.com>

Acked-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h    | 16 +++++---
>  .../amd/amdkfd/cwsr_trap_handler_gfx10.asm    | 38 ++++++++++++++-----
>  2 files changed, 38 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> index 665122d1bbbd..02f7ba8c93cd 100644
> --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> @@ -1136,7 +1136,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
>         0x705d0000, 0x807c817c,
>         0x8070ff70, 0x00000080,
>         0xbf0a7b7c, 0xbf85fff8,
> -       0xbf82013d, 0xbef4037e,
> +       0xbf82013f, 0xbef4037e,
>         0x8775ff7f, 0x0000ffff,
>         0x8875ff75, 0x00040000,
>         0xbef60380, 0xbef703ff,
> @@ -1275,7 +1275,8 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
>         0x80788478, 0xbf8c0000,
>         0xb9eef815, 0xbefc036f,
>         0xbefe0370, 0xbeff0371,
> -       0xb9f9f816, 0xb9fbf803,
> +       0xb9f9f816, 0xb9fb4803,
> +       0x907b8b7b, 0xb9fba2c3,
>         0xb9f3f801, 0xb96e3a05,
>         0x806e816e, 0xbf0d9972,
>         0xbf850002, 0x8f6e896e,
> @@ -2544,7 +2545,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
>         0xe0704000, 0x705d0000,
>         0x807c817c, 0x8070ff70,
>         0x00000080, 0xbf0a7b7c,
> -       0xbf85fff8, 0xbf820134,
> +       0xbf85fff8, 0xbf820136,
>         0xbef4037e, 0x8775ff7f,
>         0x0000ffff, 0x8875ff75,
>         0x00040000, 0xbef60380,
> @@ -2683,7 +2684,8 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
>         0xf0000000, 0x80788478,
>         0xbf8c0000, 0xb9eef815,
>         0xbefc036f, 0xbefe0370,
> -       0xbeff0371, 0xb9fbf803,
> +       0xbeff0371, 0xb9fb4803,
> +       0x907b8b7b, 0xb9fba2c3,
>         0xb9f3f801, 0xb96e3a05,
>         0x806e816e, 0xbf0d9972,
>         0xbf850002, 0x8f6e896e,
> @@ -2981,7 +2983,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
>         0x701d0000, 0x807d817d,
>         0x8070ff70, 0x00000080,
>         0xbf0a7b7d, 0xbfa2fff8,
> -       0xbfa0013f, 0xbef4007e,
> +       0xbfa00143, 0xbef4007e,
>         0x8b75ff7f, 0x0000ffff,
>         0x8c75ff75, 0x00040000,
>         0xbef60080, 0xbef700ff,
> @@ -3123,7 +3125,9 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
>         0x80788478, 0xbf890000,
>         0xb96ef815, 0xbefd006f,
>         0xbefe0070, 0xbeff0071,
> -       0xb97bf803, 0xb973f801,
> +       0xb97b4803, 0x857b8b7b,
> +       0xb97b22c3, 0x857b867b,
> +       0xb97b7443, 0xb973f801,
>         0xb8ee3b05, 0x806e816e,
>         0xbf0d9972, 0xbfa20002,
>         0x846e896e, 0xbfa00001,
> diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
> index ac3702b8e3c4..44772eec9ef4 100644
> --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
> +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
> @@ -119,9 +119,12 @@ var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT               = 7
>  var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK              = 0x100
>  var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT             = 8
>  var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK          = 0x800
> +var SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT         = 11
>  var SQ_WAVE_TRAPSTS_EXCP_HI_MASK               = 0x7000
>  #if ASIC_FAMILY >= CHIP_PLUM_BONITO
> +var SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT            = 16
>  var SQ_WAVE_TRAPSTS_WAVE_START_MASK            = 0x20000
> +var SQ_WAVE_TRAPSTS_WAVE_START_SHIFT           = 17
>  var SQ_WAVE_TRAPSTS_WAVE_END_MASK              = 0x40000
>  var SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK       = 0x100000
>  #endif
> @@ -137,14 +140,23 @@ var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x003F8000
>
>  var SQ_WAVE_MODE_DEBUG_EN_MASK                 = 0x800
>
> +var S_TRAPSTS_RESTORE_PART_1_SIZE              = SQ_WAVE_TRAPSTS_SAVECTX_SHIFT
> +var S_TRAPSTS_RESTORE_PART_2_SHIFT             = SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT
> +
>  #if ASIC_FAMILY < CHIP_PLUM_BONITO
>  var S_TRAPSTS_NON_MASKABLE_EXCP_MASK           = SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
> +var S_TRAPSTS_RESTORE_PART_2_SIZE              = 32 - S_TRAPSTS_RESTORE_PART_2_SHIFT
> +var S_TRAPSTS_RESTORE_PART_3_SHIFT             = 0
> +var S_TRAPSTS_RESTORE_PART_3_SIZE              = 0
>  #else
>  var S_TRAPSTS_NON_MASKABLE_EXCP_MASK           = SQ_WAVE_TRAPSTS_MEM_VIOL_MASK         |\
>                                                   SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK     |\
>                                                   SQ_WAVE_TRAPSTS_WAVE_START_MASK       |\
>                                                   SQ_WAVE_TRAPSTS_WAVE_END_MASK         |\
>                                                   SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK
> +var S_TRAPSTS_RESTORE_PART_2_SIZE              = SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT - SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT
> +var S_TRAPSTS_RESTORE_PART_3_SHIFT             = SQ_WAVE_TRAPSTS_WAVE_START_SHIFT
> +var S_TRAPSTS_RESTORE_PART_3_SIZE              = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT
>  #endif
>  var S_TRAPSTS_HWREG                            = HW_REG_TRAPSTS
>  var S_TRAPSTS_SAVE_CONTEXT_MASK                        = SQ_WAVE_TRAPSTS_SAVECTX_MASK
> @@ -157,6 +169,7 @@ var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK        = 0x20
>  var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK   = 0x40
>  var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT  = 6
>  var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK      = 0x80
> +var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT     = 7
>  var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK     = 0x100
>  var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT    = 8
>  var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK       = 0x200
> @@ -173,6 +186,11 @@ var S_TRAPSTS_NON_MASKABLE_EXCP_MASK               = SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK          |\
>                                                   SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK        |\
>                                                   SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK          |\
>                                                   SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK
> +var S_TRAPSTS_RESTORE_PART_1_SIZE              = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
> +var S_TRAPSTS_RESTORE_PART_2_SHIFT             = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
> +var S_TRAPSTS_RESTORE_PART_2_SIZE              = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
> +var S_TRAPSTS_RESTORE_PART_3_SHIFT             = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT
> +var S_TRAPSTS_RESTORE_PART_3_SIZE              = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT
>  var BARRIER_STATE_SIGNAL_OFFSET                        = 16
>  var BARRIER_STATE_VALID_OFFSET                 = 0
>  #endif
> @@ -1386,17 +1404,17 @@ L_SKIP_BARRIER_RESTORE:
>         s_setreg_b32    hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask
>  #endif
>
> -#if ASIC_FAMILY < CHIP_GFX12
> -       s_setreg_b32    hwreg(S_TRAPSTS_HWREG), s_restore_trapsts
> -#else
> -       // EXCP_FLAG_PRIV.SAVE_CONTEXT and HOST_TRAP may have changed.
> +       // {TRAPSTS/EXCP_FLAG_PRIV}.SAVE_CONTEXT and HOST_TRAP may have changed.
>         // Only restore the other fields to avoid clobbering them.
> -       s_setreg_b32    hwreg(S_TRAPSTS_HWREG, 0, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT), s_restore_trapsts
> -       s_lshr_b32      s_restore_trapsts, s_restore_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
> -       s_setreg_b32    hwreg(S_TRAPSTS_HWREG, SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT, 1), s_restore_trapsts
> -       s_lshr_b32      s_restore_trapsts, s_restore_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
> -       s_setreg_b32    hwreg(S_TRAPSTS_HWREG, SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT, 32 - SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT), s_restore_trapsts
> -#endif
> +       s_setreg_b32    hwreg(S_TRAPSTS_HWREG, 0, S_TRAPSTS_RESTORE_PART_1_SIZE), s_restore_trapsts
> +       s_lshr_b32      s_restore_trapsts, s_restore_trapsts, S_TRAPSTS_RESTORE_PART_2_SHIFT
> +       s_setreg_b32    hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_RESTORE_PART_2_SHIFT, S_TRAPSTS_RESTORE_PART_2_SIZE), s_restore_trapsts
> +
> +if S_TRAPSTS_RESTORE_PART_3_SIZE > 0
> +       s_lshr_b32      s_restore_trapsts, s_restore_trapsts, S_TRAPSTS_RESTORE_PART_3_SHIFT - S_TRAPSTS_RESTORE_PART_2_SHIFT
> +       s_setreg_b32    hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_RESTORE_PART_3_SHIFT, S_TRAPSTS_RESTORE_PART_3_SIZE), s_restore_trapsts
> +end
> +
>         s_setreg_b32    hwreg(HW_REG_MODE), s_restore_mode
>
>         // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] drm/amdkfd: Extend gfx12 trap handler fix to gfx10/11
  2024-06-05 23:16 [PATCH] drm/amdkfd: Extend gfx12 trap handler fix to gfx10/11 Jay Cornwall
  2024-06-06 13:05 ` Alex Deucher
@ 2024-06-07 13:08 ` Lancelot SIX
  1 sibling, 0 replies; 3+ messages in thread
From: Lancelot SIX @ 2024-06-07 13:08 UTC (permalink / raw)
  To: Jay Cornwall, amd-gfx



On 06/06/2024 00:16, Jay Cornwall wrote:
> In commit 6d1878882d2d
> ("drm/amdkfd: gfx12 context save/restore trap handler fixes") the
> following fix was introduced but incorrectly restricted to gfx12.
> The same issue and a corresponding fix apply to gfx10 and gfx11.
> 
> Do not overwrite TRAPSTS.{SAVECTX,HOST_TRAP} when restoring this
> register. Both of these fields can assert while the wavefront is
> running the trap handler.
> 
> Signed-off-by: Jay Cornwall <jay.cornwall@amd.com>
> Cc: Lancelot Six <lancelot.six@amd.com>

Hi Jay,

Thanks, that looks good to me (tested on gfx10.3, 11 and 12).  For 
gfx11+ there might be a risk of overriding perf_snapshot, but that seems 
fine as this is not used, and such sample from cwsr restore would mostly 
be meaningless anyway.

Reviewed-by: Lancelot Six <lancelot.six@amd.com>

Best,
Lancelot.

> ---
>   .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h    | 16 +++++---
>   .../amd/amdkfd/cwsr_trap_handler_gfx10.asm    | 38 ++++++++++++++-----
>   2 files changed, 38 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> index 665122d1bbbd..02f7ba8c93cd 100644
> --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
> @@ -1136,7 +1136,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
>   	0x705d0000, 0x807c817c,
>   	0x8070ff70, 0x00000080,
>   	0xbf0a7b7c, 0xbf85fff8,
> -	0xbf82013d, 0xbef4037e,
> +	0xbf82013f, 0xbef4037e,
>   	0x8775ff7f, 0x0000ffff,
>   	0x8875ff75, 0x00040000,
>   	0xbef60380, 0xbef703ff,
> @@ -1275,7 +1275,8 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
>   	0x80788478, 0xbf8c0000,
>   	0xb9eef815, 0xbefc036f,
>   	0xbefe0370, 0xbeff0371,
> -	0xb9f9f816, 0xb9fbf803,
> +	0xb9f9f816, 0xb9fb4803,
> +	0x907b8b7b, 0xb9fba2c3,
>   	0xb9f3f801, 0xb96e3a05,
>   	0x806e816e, 0xbf0d9972,
>   	0xbf850002, 0x8f6e896e,
> @@ -2544,7 +2545,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
>   	0xe0704000, 0x705d0000,
>   	0x807c817c, 0x8070ff70,
>   	0x00000080, 0xbf0a7b7c,
> -	0xbf85fff8, 0xbf820134,
> +	0xbf85fff8, 0xbf820136,
>   	0xbef4037e, 0x8775ff7f,
>   	0x0000ffff, 0x8875ff75,
>   	0x00040000, 0xbef60380,
> @@ -2683,7 +2684,8 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
>   	0xf0000000, 0x80788478,
>   	0xbf8c0000, 0xb9eef815,
>   	0xbefc036f, 0xbefe0370,
> -	0xbeff0371, 0xb9fbf803,
> +	0xbeff0371, 0xb9fb4803,
> +	0x907b8b7b, 0xb9fba2c3,
>   	0xb9f3f801, 0xb96e3a05,
>   	0x806e816e, 0xbf0d9972,
>   	0xbf850002, 0x8f6e896e,
> @@ -2981,7 +2983,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
>   	0x701d0000, 0x807d817d,
>   	0x8070ff70, 0x00000080,
>   	0xbf0a7b7d, 0xbfa2fff8,
> -	0xbfa0013f, 0xbef4007e,
> +	0xbfa00143, 0xbef4007e,
>   	0x8b75ff7f, 0x0000ffff,
>   	0x8c75ff75, 0x00040000,
>   	0xbef60080, 0xbef700ff,
> @@ -3123,7 +3125,9 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
>   	0x80788478, 0xbf890000,
>   	0xb96ef815, 0xbefd006f,
>   	0xbefe0070, 0xbeff0071,
> -	0xb97bf803, 0xb973f801,
> +	0xb97b4803, 0x857b8b7b,
> +	0xb97b22c3, 0x857b867b,
> +	0xb97b7443, 0xb973f801,
>   	0xb8ee3b05, 0x806e816e,
>   	0xbf0d9972, 0xbfa20002,
>   	0x846e896e, 0xbfa00001,
> diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
> index ac3702b8e3c4..44772eec9ef4 100644
> --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
> +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
> @@ -119,9 +119,12 @@ var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT		= 7
>   var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK		= 0x100
>   var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT		= 8
>   var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK		= 0x800
> +var SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT		= 11
>   var SQ_WAVE_TRAPSTS_EXCP_HI_MASK		= 0x7000
>   #if ASIC_FAMILY >= CHIP_PLUM_BONITO
> +var SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT		= 16
>   var SQ_WAVE_TRAPSTS_WAVE_START_MASK		= 0x20000
> +var SQ_WAVE_TRAPSTS_WAVE_START_SHIFT		= 17
>   var SQ_WAVE_TRAPSTS_WAVE_END_MASK		= 0x40000
>   var SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK	= 0x100000
>   #endif
> @@ -137,14 +140,23 @@ var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK	= 0x003F8000
>   
>   var SQ_WAVE_MODE_DEBUG_EN_MASK			= 0x800
>   
> +var S_TRAPSTS_RESTORE_PART_1_SIZE		= SQ_WAVE_TRAPSTS_SAVECTX_SHIFT
> +var S_TRAPSTS_RESTORE_PART_2_SHIFT		= SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT
> +
>   #if ASIC_FAMILY < CHIP_PLUM_BONITO
>   var S_TRAPSTS_NON_MASKABLE_EXCP_MASK		= SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
> +var S_TRAPSTS_RESTORE_PART_2_SIZE		= 32 - S_TRAPSTS_RESTORE_PART_2_SHIFT
> +var S_TRAPSTS_RESTORE_PART_3_SHIFT		= 0
> +var S_TRAPSTS_RESTORE_PART_3_SIZE		= 0
>   #else
>   var S_TRAPSTS_NON_MASKABLE_EXCP_MASK		= SQ_WAVE_TRAPSTS_MEM_VIOL_MASK		|\
>   						  SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK	|\
>   						  SQ_WAVE_TRAPSTS_WAVE_START_MASK	|\
>   						  SQ_WAVE_TRAPSTS_WAVE_END_MASK		|\
>   						  SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK
> +var S_TRAPSTS_RESTORE_PART_2_SIZE		= SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT - SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT
> +var S_TRAPSTS_RESTORE_PART_3_SHIFT		= SQ_WAVE_TRAPSTS_WAVE_START_SHIFT
> +var S_TRAPSTS_RESTORE_PART_3_SIZE		= 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT
>   #endif
>   var S_TRAPSTS_HWREG				= HW_REG_TRAPSTS
>   var S_TRAPSTS_SAVE_CONTEXT_MASK			= SQ_WAVE_TRAPSTS_SAVECTX_MASK
> @@ -157,6 +169,7 @@ var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK	= 0x20
>   var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK	= 0x40
>   var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT	= 6
>   var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK	= 0x80
> +var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT	= 7
>   var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK	= 0x100
>   var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT	= 8
>   var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK	= 0x200
> @@ -173,6 +186,11 @@ var S_TRAPSTS_NON_MASKABLE_EXCP_MASK		= SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK		|\
>   						  SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK	|\
>   						  SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK		|\
>   						  SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK
> +var S_TRAPSTS_RESTORE_PART_1_SIZE		= SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
> +var S_TRAPSTS_RESTORE_PART_2_SHIFT		= SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
> +var S_TRAPSTS_RESTORE_PART_2_SIZE		= SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
> +var S_TRAPSTS_RESTORE_PART_3_SHIFT		= SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT
> +var S_TRAPSTS_RESTORE_PART_3_SIZE		= 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT
>   var BARRIER_STATE_SIGNAL_OFFSET			= 16
>   var BARRIER_STATE_VALID_OFFSET			= 0
>   #endif
> @@ -1386,17 +1404,17 @@ L_SKIP_BARRIER_RESTORE:
>   	s_setreg_b32	hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask
>   #endif
>   
> -#if ASIC_FAMILY < CHIP_GFX12
> -	s_setreg_b32	hwreg(S_TRAPSTS_HWREG), s_restore_trapsts
> -#else
> -	// EXCP_FLAG_PRIV.SAVE_CONTEXT and HOST_TRAP may have changed.
> +	// {TRAPSTS/EXCP_FLAG_PRIV}.SAVE_CONTEXT and HOST_TRAP may have changed.
>   	// Only restore the other fields to avoid clobbering them.
> -	s_setreg_b32	hwreg(S_TRAPSTS_HWREG, 0, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT), s_restore_trapsts
> -	s_lshr_b32	s_restore_trapsts, s_restore_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
> -	s_setreg_b32	hwreg(S_TRAPSTS_HWREG, SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT, 1), s_restore_trapsts
> -	s_lshr_b32	s_restore_trapsts, s_restore_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
> -	s_setreg_b32	hwreg(S_TRAPSTS_HWREG, SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT, 32 - SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT), s_restore_trapsts
> -#endif
> +	s_setreg_b32	hwreg(S_TRAPSTS_HWREG, 0, S_TRAPSTS_RESTORE_PART_1_SIZE), s_restore_trapsts
> +	s_lshr_b32	s_restore_trapsts, s_restore_trapsts, S_TRAPSTS_RESTORE_PART_2_SHIFT
> +	s_setreg_b32	hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_RESTORE_PART_2_SHIFT, S_TRAPSTS_RESTORE_PART_2_SIZE), s_restore_trapsts
> +
> +if S_TRAPSTS_RESTORE_PART_3_SIZE > 0
> +	s_lshr_b32	s_restore_trapsts, s_restore_trapsts, S_TRAPSTS_RESTORE_PART_3_SHIFT - S_TRAPSTS_RESTORE_PART_2_SHIFT
> +	s_setreg_b32	hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_RESTORE_PART_3_SHIFT, S_TRAPSTS_RESTORE_PART_3_SIZE), s_restore_trapsts
> +end
> +
>   	s_setreg_b32	hwreg(HW_REG_MODE), s_restore_mode
>   
>   	// Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-06-07 13:09 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-06-05 23:16 [PATCH] drm/amdkfd: Extend gfx12 trap handler fix to gfx10/11 Jay Cornwall
2024-06-06 13:05 ` Alex Deucher
2024-06-07 13:08 ` Lancelot SIX

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox