From: Jay Cornwall <jay.cornwall@amd.com>
To: <amd-gfx@lists.freedesktop.org>
Cc: Jay Cornwall <jay.cornwall@amd.com>
Subject: [PATCH 2/3] drm/amdkfd: Sign-extend TMA address in trap handler
Date: Mon, 31 Jul 2023 15:40:18 -0500 [thread overview]
Message-ID: <20230731204019.9519-2-jay.cornwall@amd.com> (raw)
In-Reply-To: <20230731204019.9519-1-jay.cornwall@amd.com>
SMEM instructions can reach addresses above 47 bits but require
bit 47 to be sign-extended through bits [63:48].
This allows the TMA to be relocated in a following patch.
Signed-off-by: Jay Cornwall <jay.cornwall@amd.com>
---
.../gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 58 ++++++++++++-------
.../amd/amdkfd/cwsr_trap_handler_gfx10.asm | 5 ++
.../drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 5 ++
3 files changed, 46 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 717ad0633dbe..d7cd5fa313ff 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -274,14 +274,14 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
static const uint32_t cwsr_trap_gfx9_hex[] = {
- 0xbf820001, 0xbf820254,
+ 0xbf820001, 0xbf820258,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
0xbf840009, 0x866eff6d,
0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400,
- 0xbf850051, 0xbf8e0010,
+ 0xbf850055, 0xbf8e0010,
0xb8fbf803, 0xbf82fffa,
0x866eff7b, 0x03c00900,
0xbf850015, 0x866eff7b,
@@ -294,13 +294,15 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0xbf850007, 0xb8eef801,
0x866eff6e, 0x00000800,
0xbf850003, 0x866eff7b,
- 0x00000400, 0xbf850036,
+ 0x00000400, 0xbf85003a,
0xb8faf807, 0x867aff7a,
0x001f8000, 0x8e7a8b7a,
0x8977ff77, 0xfc000000,
0x87777a77, 0xba7ff807,
0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x877bff7b, 0xffff0000,
0xc0031bbd, 0x00000010,
0xbf8cc07f, 0x8e6e976e,
0x8977ff77, 0x00800000,
@@ -676,14 +678,14 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
};
static const uint32_t cwsr_trap_nv1x_hex[] = {
- 0xbf820001, 0xbf8201f1,
+ 0xbf820001, 0xbf8201f5,
0xb0804004, 0xb978f802,
0x8a78ff78, 0x00020006,
0xb97bf803, 0x876eff78,
0x00002000, 0xbf840009,
0x876eff6d, 0x00ff0000,
0xbf85001e, 0x876eff7b,
- 0x00000400, 0xbf850057,
+ 0x00000400, 0xbf85005b,
0xbf8e0010, 0xb97bf803,
0xbf82fffa, 0x876eff7b,
0x00000900, 0xbf850015,
@@ -697,7 +699,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xb96ef801, 0x876eff6e,
0x00000800, 0xbf850003,
0x876eff7b, 0x00000400,
- 0xbf85003c, 0x8a77ff77,
+ 0xbf850040, 0x8a77ff77,
0xff000000, 0xb97af807,
0x877bff7a, 0x02000000,
0x8f7b867b, 0x88777b77,
@@ -706,6 +708,8 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x8a7aff7a, 0x023f8000,
0xb9faf807, 0xb97af812,
0xb97bf813, 0x8ffa887a,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x887bff7b, 0xffff0000,
0xf4011bbd, 0xfa000010,
0xbf8cc07f, 0x8f6e976e,
0x8a77ff77, 0x00800000,
@@ -1094,14 +1098,14 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
};
static const uint32_t cwsr_trap_arcturus_hex[] = {
- 0xbf820001, 0xbf8202d0,
+ 0xbf820001, 0xbf8202d4,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
0xbf840009, 0x866eff6d,
0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400,
- 0xbf850051, 0xbf8e0010,
+ 0xbf850055, 0xbf8e0010,
0xb8fbf803, 0xbf82fffa,
0x866eff7b, 0x03c00900,
0xbf850015, 0x866eff7b,
@@ -1114,13 +1118,15 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0xbf850007, 0xb8eef801,
0x866eff6e, 0x00000800,
0xbf850003, 0x866eff7b,
- 0x00000400, 0xbf850036,
+ 0x00000400, 0xbf85003a,
0xb8faf807, 0x867aff7a,
0x001f8000, 0x8e7a8b7a,
0x8977ff77, 0xfc000000,
0x87777a77, 0xba7ff807,
0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x877bff7b, 0xffff0000,
0xc0031bbd, 0x00000010,
0xbf8cc07f, 0x8e6e976e,
0x8977ff77, 0x00800000,
@@ -1572,14 +1578,14 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
};
static const uint32_t cwsr_trap_aldebaran_hex[] = {
- 0xbf820001, 0xbf8202db,
+ 0xbf820001, 0xbf8202df,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
0xbf840009, 0x866eff6d,
0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400,
- 0xbf850051, 0xbf8e0010,
+ 0xbf850055, 0xbf8e0010,
0xb8fbf803, 0xbf82fffa,
0x866eff7b, 0x03c00900,
0xbf850015, 0x866eff7b,
@@ -1592,13 +1598,15 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0xbf850007, 0xb8eef801,
0x866eff6e, 0x00000800,
0xbf850003, 0x866eff7b,
- 0x00000400, 0xbf850036,
+ 0x00000400, 0xbf85003a,
0xb8faf807, 0x867aff7a,
0x001f8000, 0x8e7a8b7a,
0x8977ff77, 0xfc000000,
0x87777a77, 0xba7ff807,
0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x877bff7b, 0xffff0000,
0xc0031bbd, 0x00000010,
0xbf8cc07f, 0x8e6e976e,
0x8977ff77, 0x00800000,
@@ -2061,14 +2069,14 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
};
static const uint32_t cwsr_trap_gfx10_hex[] = {
- 0xbf820001, 0xbf82021c,
+ 0xbf820001, 0xbf820220,
0xb0804004, 0xb978f802,
0x8a78ff78, 0x00020006,
0xb97bf803, 0x876eff78,
0x00002000, 0xbf840009,
0x876eff6d, 0x00ff0000,
0xbf85001e, 0x876eff7b,
- 0x00000400, 0xbf850041,
+ 0x00000400, 0xbf850045,
0xbf8e0010, 0xb97bf803,
0xbf82fffa, 0x876eff7b,
0x00000900, 0xbf850015,
@@ -2082,8 +2090,10 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xb96ef801, 0x876eff6e,
0x00000800, 0xbf850003,
0x876eff7b, 0x00000400,
- 0xbf850026, 0xb97af812,
+ 0xbf85002a, 0xb97af812,
0xb97bf813, 0x8ffa887a,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x887bff7b, 0xffff0000,
0xf4011bbd, 0xfa000010,
0xbf8cc07f, 0x8f6e976e,
0x8a77ff77, 0x00800000,
@@ -2496,7 +2506,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
};
static const uint32_t cwsr_trap_gfx11_hex[] = {
- 0xbfa00001, 0xbfa00221,
+ 0xbfa00001, 0xbfa00225,
0xb0804006, 0xb8f8f802,
0x9178ff78, 0x00020006,
0xb8fbf803, 0xbf0d9e6d,
@@ -2506,7 +2516,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
0xbfa10009, 0x8b6eff6d,
0x00ff0000, 0xbfa2001e,
0x8b6eff7b, 0x00000400,
- 0xbfa20041, 0xbf830010,
+ 0xbfa20045, 0xbf830010,
0xb8fbf803, 0xbfa0fffa,
0x8b6eff7b, 0x00000900,
0xbfa20015, 0x8b6eff7b,
@@ -2519,9 +2529,11 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
0xbfa20007, 0xb8eef801,
0x8b6eff6e, 0x00000800,
0xbfa20003, 0x8b6eff7b,
- 0x00000400, 0xbfa20026,
+ 0x00000400, 0xbfa2002a,
0xbefa4d82, 0xbf89fc07,
- 0x84fa887a, 0xf4005bbd,
+ 0x84fa887a, 0xbf0d8f7b,
+ 0xbfa10002, 0x8c7bff7b,
+ 0xffff0000, 0xf4005bbd,
0xf8000010, 0xbf89fc07,
0x846e976e, 0x9177ff77,
0x00800000, 0x8c776e77,
@@ -2939,14 +2951,14 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
};
static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
- 0xbf820001, 0xbf8202d7,
+ 0xbf820001, 0xbf8202db,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
0xbf840009, 0x866eff6d,
0x00ff0000, 0xbf85001a,
0x866eff7b, 0x00000400,
- 0xbf85004d, 0xbf8e0010,
+ 0xbf850051, 0xbf8e0010,
0xb8fbf803, 0xbf82fffa,
0x866eff7b, 0x03c00900,
0xbf850011, 0x866eff7b,
@@ -2957,13 +2969,15 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0x866e6f6e, 0xbf850006,
0x866eff6d, 0x00ff0000,
0xbf850003, 0x866eff7b,
- 0x00000400, 0xbf850036,
+ 0x00000400, 0xbf85003a,
0xb8faf807, 0x867aff7a,
0x001f8000, 0x8e7a8b7a,
0x8979ff79, 0xfc000000,
0x87797a79, 0xba7ff807,
0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x877bff7b, 0xffff0000,
0xc0031bbd, 0x00000010,
0xbf8cc07f, 0x8e6e976e,
0x8979ff79, 0x00800000,
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index 8b92c33c2a7c..fdab64624422 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -276,6 +276,11 @@ L_FETCH_2ND_TRAP:
#endif
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+ s_bitcmp1_b32 ttmp15, 0xF
+ s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
+ s_or_b32 ttmp15, ttmp15, 0xFFFF0000
+L_NO_SIGN_EXTEND_TMA:
+
s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
s_waitcnt lgkmcnt(0)
s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index f2087cc2e89d..e506411ad28a 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -283,6 +283,11 @@ L_FETCH_2ND_TRAP:
s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+ s_bitcmp1_b32 ttmp15, 0xF
+ s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
+ s_or_b32 ttmp15, ttmp15, 0xFFFF0000
+L_NO_SIGN_EXTEND_TMA:
+
s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
s_waitcnt lgkmcnt(0)
s_lshl_b32 ttmp2, ttmp2, TTMP_DEBUG_TRAP_ENABLED_SHIFT
--
2.25.1
next prev parent reply other threads:[~2023-07-31 20:41 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-07-31 20:40 [PATCH 1/3] drm/amdkfd: Sync trap handler binaries with source Jay Cornwall
2023-07-31 20:40 ` Jay Cornwall [this message]
2023-07-31 20:40 ` [PATCH 3/3] drm/amdkfd: Relocate TBA/TMA to opposite side of VM hole Jay Cornwall
2023-08-02 20:13 ` [PATCH 1/3] drm/amdkfd: Sync trap handler binaries with source Felix Kuehling
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230731204019.9519-2-jay.cornwall@amd.com \
--to=jay.cornwall@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.