* [PATCH 2/3] drm/amdkfd: Set per-process flags only once for gfx9/10/11/12
2025-03-06 15:55 [PATCH 1/3] drm/amdkfd: Set per-process flags only once cik/vi Amber Lin
@ 2025-03-06 15:55 ` Amber Lin
2025-03-06 15:55 ` [PATCH 3/3] drm/amdkfd: Add support for more per-process flag Amber Lin
1 sibling, 0 replies; 3+ messages in thread
From: Amber Lin @ 2025-03-06 15:55 UTC (permalink / raw)
To: amd-gfx; +Cc: Harish Kasiviswanathan
From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Define set_cache_memory_policy() for these asics and move all static
changes from update_qpd() which is called each time a queue is created
to set_cache_memory_policy() which is called once during process
initialization
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
---
.../amd/amdkfd/kfd_device_queue_manager_v10.c | 41 +++++++++++--------
.../amd/amdkfd/kfd_device_queue_manager_v11.c | 41 +++++++++++--------
.../amd/amdkfd/kfd_device_queue_manager_v12.c | 41 +++++++++++--------
.../amd/amdkfd/kfd_device_queue_manager_v9.c | 36 +++++++++++++++-
4 files changed, 107 insertions(+), 52 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
index 245a90dfc2f6..b5f5f141353b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
@@ -31,10 +31,17 @@ static int update_qpd_v10(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm_v10(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size);
void device_queue_manager_init_v10(
struct device_queue_manager_asic_ops *asic_ops)
{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v10;
asic_ops->update_qpd = update_qpd_v10;
asic_ops->init_sdma_vm = init_sdma_vm_v10;
asic_ops->mqd_manager_init = mqd_manager_init_v10;
@@ -49,27 +56,27 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
private_base;
}
-static int update_qpd_v10(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
+static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size)
{
- struct kfd_process_device *pdd;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
- (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
- qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+ qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+ return true;
+}
+static int update_qpd_v10(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
index 2e129da7acb4..f436878d0d62 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
@@ -30,10 +30,17 @@ static int update_qpd_v11(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size);
void device_queue_manager_init_v11(
struct device_queue_manager_asic_ops *asic_ops)
{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v11;
asic_ops->update_qpd = update_qpd_v11;
asic_ops->init_sdma_vm = init_sdma_vm_v11;
asic_ops->mqd_manager_init = mqd_manager_init_v11;
@@ -48,28 +55,28 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
private_base;
}
-static int update_qpd_v11(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
+static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size)
{
- struct kfd_process_device *pdd;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
- (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
-
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
+ qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
- qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+ return true;
+}
+static int update_qpd_v11(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
index 4f3295b29dfb..62ca1c8fcbaf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
@@ -30,10 +30,17 @@ static int update_qpd_v12(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm_v12(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v12(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size);
void device_queue_manager_init_v12(
struct device_queue_manager_asic_ops *asic_ops)
{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v12;
asic_ops->update_qpd = update_qpd_v12;
asic_ops->init_sdma_vm = init_sdma_vm_v12;
asic_ops->mqd_manager_init = mqd_manager_init_v12;
@@ -48,28 +55,28 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
private_base;
}
-static int update_qpd_v12(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
+static bool set_cache_memory_policy_v12(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size)
{
- struct kfd_process_device *pdd;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
- (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
-
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
+ qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
- qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+ return true;
+}
+static int update_qpd_v12(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index 67137e674f1d..c734eb9b505f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -30,10 +30,17 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v9(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size);
void device_queue_manager_init_v9(
struct device_queue_manager_asic_ops *asic_ops)
{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v9;
asic_ops->update_qpd = update_qpd_v9;
asic_ops->init_sdma_vm = init_sdma_vm_v9;
asic_ops->mqd_manager_init = mqd_manager_init_v9;
@@ -48,10 +55,37 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
private_base;
}
+static bool set_cache_memory_policy_v9(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size)
+{
+ qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+
+ if (dqm->dev->kfd->noretry)
+ qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+
+ if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0))
+ qpd->sh_mem_config |= (1 << SH_MEM_CONFIG__F8_MODE__SHIFT);
+
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
+
+ pr_debug("sh_mem_bases 0x%X sh_mem_config 0x%X\n", qpd->sh_mem_bases,
+ qpd->sh_mem_config);
+ return true;
+}
+
static int update_qpd_v9(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
- struct kfd_process_device *pdd;
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
pdd = qpd_to_pdd(qpd);
--
2.34.1
^ permalink raw reply related [flat|nested] 3+ messages in thread* [PATCH 3/3] drm/amdkfd: Add support for more per-process flag
2025-03-06 15:55 [PATCH 1/3] drm/amdkfd: Set per-process flags only once cik/vi Amber Lin
2025-03-06 15:55 ` [PATCH 2/3] drm/amdkfd: Set per-process flags only once for gfx9/10/11/12 Amber Lin
@ 2025-03-06 15:55 ` Amber Lin
1 sibling, 0 replies; 3+ messages in thread
From: Amber Lin @ 2025-03-06 15:55 UTC (permalink / raw)
To: amd-gfx; +Cc: Harish Kasiviswanathan, Felix Kuehling
From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Add support for more per-process flags starting with option to configure
MFMA precision for gfx 9.5
v2: Change flag name to KFD_PROC_FLAG_MFMA_HIGH_PRECISION
Remove unused else condition
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 3 ++-
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 ++++--
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 6 ++++--
.../gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 6 ++++--
.../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c | 6 ++++--
.../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c | 6 ++++--
.../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c | 6 ++++--
.../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c | 11 +++++++++--
.../gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 6 ++++--
include/uapi/linux/kfd_ioctl.h | 5 ++++-
10 files changed, 43 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 8c2e92378b49..1e9dd00620bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -606,7 +606,8 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
default_policy,
alternate_policy,
(void __user *)args->alternate_aperture_base,
- args->alternate_aperture_size))
+ args->alternate_aperture_size,
+ args->misc_process_flag))
err = -EINVAL;
out:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d23c6a358d34..2afcc1b4856a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2596,7 +2596,8 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
bool retval = true;
@@ -2611,7 +2612,8 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
default_policy,
alternate_policy,
alternate_aperture_base,
- alternate_aperture_size);
+ alternate_aperture_size,
+ misc_process_properties);
if (retval)
goto out;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 7146e227e2c1..122eb745e9c4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -174,7 +174,8 @@ struct device_queue_manager_ops {
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
int (*process_termination)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
@@ -210,7 +211,8 @@ struct device_queue_manager_asic_ops {
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void (*init_sdma_vm)(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index 32bedef912b3..0508ef5a41d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -40,7 +40,8 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
static int update_qpd_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm(struct device_queue_manager *dqm,
@@ -88,7 +89,8 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
uint32_t default_mtype;
uint32_t ape1_mtype;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
index b5f5f141353b..ba6e3d747ccd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
@@ -36,7 +36,8 @@ static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void device_queue_manager_init_v10(
struct device_queue_manager_asic_ops *asic_ops)
@@ -61,7 +62,8 @@ static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
index f436878d0d62..8b447d04558f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
@@ -35,7 +35,8 @@ static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void device_queue_manager_init_v11(
struct device_queue_manager_asic_ops *asic_ops)
@@ -60,7 +61,8 @@ static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
index 62ca1c8fcbaf..3550da3a46f9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
@@ -35,7 +35,8 @@ static bool set_cache_memory_policy_v12(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void device_queue_manager_init_v12(
struct device_queue_manager_asic_ops *asic_ops)
@@ -60,7 +61,8 @@ static bool set_cache_memory_policy_v12(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index c734eb9b505f..4635077aa905 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -35,7 +35,8 @@ static bool set_cache_memory_policy_v9(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void device_queue_manager_init_v9(
struct device_queue_manager_asic_ops *asic_ops)
@@ -60,7 +61,8 @@ static bool set_cache_memory_policy_v9(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
@@ -73,6 +75,11 @@ static bool set_cache_memory_policy_v9(struct device_queue_manager *dqm,
KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0))
qpd->sh_mem_config |= (1 << SH_MEM_CONFIG__F8_MODE__SHIFT);
+ if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0)) {
+ if (misc_process_properties & KFD_PROC_FLAG_MFMA_HIGH_PRECISION)
+ qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__PRECISION_MODE__SHIFT;
+ }
+
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index 320518f41890..dad83356e976 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -40,7 +40,8 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
static int update_qpd_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm(struct device_queue_manager *dqm,
@@ -89,7 +90,8 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
uint32_t default_mtype;
uint32_t ape1_mtype;
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index b0160b09987c..e7b4b1551529 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -150,6 +150,9 @@ struct kfd_dbg_device_info_entry {
#define KFD_IOC_CACHE_POLICY_COHERENT 0
#define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
+/* Misc. per process flags */
+#define KFD_PROC_FLAG_MFMA_HIGH_PRECISION (1 << 0)
+
struct kfd_ioctl_set_memory_policy_args {
__u64 alternate_aperture_base; /* to KFD */
__u64 alternate_aperture_size; /* to KFD */
@@ -157,7 +160,7 @@ struct kfd_ioctl_set_memory_policy_args {
__u32 gpu_id; /* to KFD */
__u32 default_policy; /* to KFD */
__u32 alternate_policy; /* to KFD */
- __u32 pad;
+ __u32 misc_process_flag; /* to KFD */
};
/*
--
2.34.1
^ permalink raw reply related [flat|nested] 3+ messages in thread