From: Amber Lin <Amber.Lin@amd.com>
To: <amd-gfx@lists.freedesktop.org>
Cc: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Subject: [PATCH 1/3] drm/amdkfd: Set per-process flags only once cik/vi
Date: Thu, 6 Mar 2025 10:55:53 -0500 [thread overview]
Message-ID: <20250306155555.4687-1-Amber.Lin@amd.com> (raw)
From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Set per-process static sh_mem config only once during process
initialization. Move all static changes from update_qpd() which is
called each time a queue is created to set_cache_memory_policy() which
is called once during process initialization.
set_cache_memory_policy() is currently defined only for cik and vi
family. So this commit only focuses on these two. A separate commit will
address other asics.
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
---
.../drm/amd/amdkfd/kfd_device_queue_manager.c | 39 +---------
.../amd/amdkfd/kfd_device_queue_manager_cik.c | 69 ++++++++++++------
.../amd/amdkfd/kfd_device_queue_manager_vi.c | 71 ++++++++++++-------
3 files changed, 94 insertions(+), 85 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f3f2fd6ee65c..d23c6a358d34 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2591,14 +2591,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
return retval;
}
-/*
- * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
- * stay in user mode.
- */
-#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
-/* APE1 limit is inclusive and 64K aligned. */
-#define APE1_LIMIT_ALIGNMENT 0xFFFF
-
static bool set_cache_memory_policy(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
@@ -2613,34 +2605,6 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
dqm_lock(dqm);
- if (alternate_aperture_size == 0) {
- /* base > limit disables APE1 */
- qpd->sh_mem_ape1_base = 1;
- qpd->sh_mem_ape1_limit = 0;
- } else {
- /*
- * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
- * SH_MEM_APE1_BASE[31:0], 0x0000 }
- * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
- * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
- * Verify that the base and size parameters can be
- * represented in this format and convert them.
- * Additionally restrict APE1 to user-mode addresses.
- */
-
- uint64_t base = (uintptr_t)alternate_aperture_base;
- uint64_t limit = base + alternate_aperture_size - 1;
-
- if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
- (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
- retval = false;
- goto out;
- }
-
- qpd->sh_mem_ape1_base = base >> 16;
- qpd->sh_mem_ape1_limit = limit >> 16;
- }
-
retval = dqm->asic_ops.set_cache_memory_policy(
dqm,
qpd,
@@ -2649,6 +2613,9 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
alternate_aperture_base,
alternate_aperture_size);
+ if (retval)
+ goto out;
+
if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
program_sh_mem_settings(dqm, qpd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index d4d95c7f2e5d..32bedef912b3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -27,6 +27,14 @@
#include "oss/oss_2_4_sh_mask.h"
#include "gca/gfx_7_2_sh_mask.h"
+/*
+ * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
+ * stay in user mode.
+ */
+#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
+/* APE1 limit is inclusive and 64K aligned. */
+#define APE1_LIMIT_ALIGNMENT 0xFFFF
+
static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
@@ -84,6 +92,36 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
{
uint32_t default_mtype;
uint32_t ape1_mtype;
+ unsigned int temp;
+ bool retval = true;
+
+ if (alternate_aperture_size == 0) {
+ /* base > limit disables APE1 */
+ qpd->sh_mem_ape1_base = 1;
+ qpd->sh_mem_ape1_limit = 0;
+ } else {
+ /*
+ * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
+ * SH_MEM_APE1_BASE[31:0], 0x0000 }
+ * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
+ * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
+ * Verify that the base and size parameters can be
+ * represented in this format and convert them.
+ * Additionally restrict APE1 to user-mode addresses.
+ */
+
+ uint64_t base = (uintptr_t)alternate_aperture_base;
+ uint64_t limit = base + alternate_aperture_size - 1;
+
+ if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
+ (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
+ retval = false;
+ goto out;
+ }
+
+ qpd->sh_mem_ape1_base = base >> 16;
+ qpd->sh_mem_ape1_limit = limit >> 16;
+ }
default_mtype = (default_policy == cache_policy_coherent) ?
MTYPE_NONCACHED :
@@ -97,37 +135,22 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
| ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
| DEFAULT_MTYPE(default_mtype)
| APE1_MTYPE(ape1_mtype);
-
- return true;
-}
-
-static int update_qpd_cik(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
-{
- struct kfd_process_device *pdd;
- unsigned int temp;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
- DEFAULT_MTYPE(MTYPE_NONCACHED) |
- APE1_MTYPE(MTYPE_NONCACHED);
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
*/
- temp = get_sh_mem_bases_nybble_64(pdd);
+ temp = get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd));
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
+out:
+ return retval;
+}
+
+static int update_qpd_cik(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index b291ee0fab94..320518f41890 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -27,6 +27,14 @@
#include "gca/gfx_8_0_sh_mask.h"
#include "oss/oss_3_0_sh_mask.h"
+/*
+ * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
+ * stay in user mode.
+ */
+#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
+/* APE1 limit is inclusive and 64K aligned. */
+#define APE1_LIMIT_ALIGNMENT 0xFFFF
+
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
@@ -85,6 +93,36 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
{
uint32_t default_mtype;
uint32_t ape1_mtype;
+ unsigned int temp;
+ bool retval = true;
+
+ if (alternate_aperture_size == 0) {
+ /* base > limit disables APE1 */
+ qpd->sh_mem_ape1_base = 1;
+ qpd->sh_mem_ape1_limit = 0;
+ } else {
+ /*
+ * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
+ * SH_MEM_APE1_BASE[31:0], 0x0000 }
+ * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
+ * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
+ * Verify that the base and size parameters can be
+ * represented in this format and convert them.
+ * Additionally restrict APE1 to user-mode addresses.
+ */
+
+ uint64_t base = (uintptr_t)alternate_aperture_base;
+ uint64_t limit = base + alternate_aperture_size - 1;
+
+ if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
+ (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
+ retval = false;
+ goto out;
+ }
+
+ qpd->sh_mem_ape1_base = base >> 16;
+ qpd->sh_mem_ape1_limit = limit >> 16;
+ }
default_mtype = (default_policy == cache_policy_coherent) ?
MTYPE_UC :
@@ -100,40 +138,21 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
- return true;
-}
-
-static int update_qpd_vi(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
-{
- struct kfd_process_device *pdd;
- unsigned int temp;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
- MTYPE_UC <<
- SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
- MTYPE_UC <<
- SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
-
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
*/
- temp = get_sh_mem_bases_nybble_64(pdd);
+ temp = get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd));
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n",
temp, qpd->sh_mem_bases);
+out:
+ return retval;
+}
+static int update_qpd_vi(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
return 0;
}
--
2.34.1
next reply other threads:[~2025-03-06 15:56 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-06 15:55 Amber Lin [this message]
2025-03-06 15:55 ` [PATCH 2/3] drm/amdkfd: Set per-process flags only once for gfx9/10/11/12 Amber Lin
2025-03-06 15:55 ` [PATCH 3/3] drm/amdkfd: Add support for more per-process flag Amber Lin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250306155555.4687-1-Amber.Lin@amd.com \
--to=amber.lin@amd.com \
--cc=Harish.Kasiviswanathan@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.