* [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
@ 2025-07-30 14:27 Shaoyun Liu
2025-08-06 17:44 ` Alex Deucher
0 siblings, 1 reply; 15+ messages in thread
From: Shaoyun Liu @ 2025-07-30 14:27 UTC (permalink / raw)
To: amd-gfx; +Cc: Shaoyun Liu
From MES version 0x81, it provide the new API INV_TLBS that support
invalidate tlbs with PASID.
Signed-off-by: Shaoyun Liu <shaoyun.liu@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 9 +++++++++
drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 15 +++++++++++++++
drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 24 ++++++++++++++++++++++++
3 files changed, 48 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index c0d2c195fe2e..f4c40f1aecd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -280,6 +280,12 @@ struct mes_reset_queue_input {
bool is_kq;
};
+struct mes_inv_tlbs_pasid_input {
+ uint16_t pasid;
+ uint8_t hub_id;
+ uint8_t flush_type;
+};
+
enum mes_misc_opcode {
MES_MISC_OP_WRITE_REG,
MES_MISC_OP_READ_REG,
@@ -367,6 +373,9 @@ struct amdgpu_mes_funcs {
int (*reset_hw_queue)(struct amdgpu_mes *mes,
struct mes_reset_queue_input *input);
+
+ int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
+ struct mes_inv_tlbs_pasid_input *input);
};
#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index feb92e107af8..323ec04094ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -339,6 +339,21 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint16_t queried;
int vmid, i;
+ if (adev->enable_uni_mes && adev->mes.ring[0].sched.ready &&
+ (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x81) {
+
+ struct mes_inv_tlbs_pasid_input input = {0};
+ input.pasid = pasid;
+ input.flush_type = flush_type;
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ if (all_hub) {
+ /* hub_id = 1 means for mm_hub*/
+ input.hub_id = 1;
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ }
+ return;
+ }
+
for (vmid = 1; vmid < 16; vmid++) {
bool valid;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index 6b222630f3fa..2e9191fffaf1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -108,6 +108,7 @@ static const char *mes_v12_0_opcodes[] = {
"SET_SE_MODE",
"SET_GANG_SUBMIT",
"SET_HW_RSRC_1",
+ "INVALIDATE_TLBS",
};
static const char *mes_v12_0_misc_opcodes[] = {
@@ -879,6 +880,28 @@ static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
offsetof(union MESAPI__RESET, api_status));
}
+static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes,
+ struct mes_inv_tlbs_pasid_input *input)
+{
+ union MESAPI__INV_TLBS mes_inv_tlbs;
+
+ memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
+
+ mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
+ mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
+ mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
+ mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
+ mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
+ mes_inv_tlbs.invalidate_tlbs.hub_id = (uint32_t)input->hub_id;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_KIQ_PIPE,
+ &mes_inv_tlbs, sizeof(mes_inv_tlbs),
+ offsetof(union MESAPI__INV_TLBS, api_status));
+
+}
+
static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
.add_hw_queue = mes_v12_0_add_hw_queue,
.remove_hw_queue = mes_v12_0_remove_hw_queue,
@@ -888,6 +911,7 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
.resume_gang = mes_v12_0_resume_gang,
.misc_op = mes_v12_0_misc_op,
.reset_hw_queue = mes_v12_0_reset_hw_queue,
+ .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid,
};
static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev,
--
2.34.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
@ 2025-08-06 14:50 Shaoyun Liu
0 siblings, 0 replies; 15+ messages in thread
From: Shaoyun Liu @ 2025-08-06 14:50 UTC (permalink / raw)
To: amd-gfx; +Cc: Shaoyun Liu
From MES version 0x81, it provide the new API INV_TLBS that support
invalidate tlbs with PASID.
Signed-off-by: Shaoyun Liu <shaoyun.liu@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 10 ++++++++++
drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 15 +++++++++++++++
drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 24 ++++++++++++++++++++++++
3 files changed, 49 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index c0d2c195fe2e..de3c9c335a2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -280,6 +280,13 @@ struct mes_reset_queue_input {
bool is_kq;
};
+struct mes_inv_tlbs_pasid_input {
+ uint32_t xcc_id;
+ uint16_t pasid;
+ uint8_t hub_id;
+ uint8_t flush_type;
+};
+
enum mes_misc_opcode {
MES_MISC_OP_WRITE_REG,
MES_MISC_OP_READ_REG,
@@ -367,6 +374,9 @@ struct amdgpu_mes_funcs {
int (*reset_hw_queue)(struct amdgpu_mes *mes,
struct mes_reset_queue_input *input);
+
+ int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
+ struct mes_inv_tlbs_pasid_input *input);
};
#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index feb92e107af8..323ec04094ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -339,6 +339,21 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint16_t queried;
int vmid, i;
+ if (adev->enable_uni_mes && adev->mes.ring[0].sched.ready &&
+ (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x81) {
+
+ struct mes_inv_tlbs_pasid_input input = {0};
+ input.pasid = pasid;
+ input.flush_type = flush_type;
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ if (all_hub) {
+ /* hub_id = 1 means for mm_hub*/
+ input.hub_id = 1;
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ }
+ return;
+ }
+
for (vmid = 1; vmid < 16; vmid++) {
bool valid;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index 6b222630f3fa..2e9191fffaf1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -108,6 +108,7 @@ static const char *mes_v12_0_opcodes[] = {
"SET_SE_MODE",
"SET_GANG_SUBMIT",
"SET_HW_RSRC_1",
+ "INVALIDATE_TLBS",
};
static const char *mes_v12_0_misc_opcodes[] = {
@@ -879,6 +880,28 @@ static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
offsetof(union MESAPI__RESET, api_status));
}
+static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes,
+ struct mes_inv_tlbs_pasid_input *input)
+{
+ union MESAPI__INV_TLBS mes_inv_tlbs;
+
+ memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
+
+ mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
+ mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
+ mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
+ mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
+ mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
+ mes_inv_tlbs.invalidate_tlbs.hub_id = (uint32_t)input->hub_id;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_KIQ_PIPE,
+ &mes_inv_tlbs, sizeof(mes_inv_tlbs),
+ offsetof(union MESAPI__INV_TLBS, api_status));
+
+}
+
static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
.add_hw_queue = mes_v12_0_add_hw_queue,
.remove_hw_queue = mes_v12_0_remove_hw_queue,
@@ -888,6 +911,7 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
.resume_gang = mes_v12_0_resume_gang,
.misc_op = mes_v12_0_misc_op,
.reset_hw_queue = mes_v12_0_reset_hw_queue,
+ .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid,
};
static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev,
--
2.34.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
2025-07-30 14:27 Shaoyun Liu
@ 2025-08-06 17:44 ` Alex Deucher
2025-08-06 18:02 ` Liu, Shaoyun
0 siblings, 1 reply; 15+ messages in thread
From: Alex Deucher @ 2025-08-06 17:44 UTC (permalink / raw)
To: Shaoyun Liu; +Cc: amd-gfx
On Wed, Jul 30, 2025 at 10:33 AM Shaoyun Liu <shaoyun.liu@amd.com> wrote:
>
> From MES version 0x81, it provide the new API INV_TLBS that support
> invalidate tlbs with PASID.
>
> Signed-off-by: Shaoyun Liu <shaoyun.liu@amd.com>
> ---> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 9 +++++++++
> drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 15 +++++++++++++++
> drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 24 ++++++++++++++++++++++++
> 3 files changed, 48 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index c0d2c195fe2e..f4c40f1aecd2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -280,6 +280,12 @@ struct mes_reset_queue_input {
> bool is_kq;
> };
>
> +struct mes_inv_tlbs_pasid_input {
> + uint16_t pasid;
> + uint8_t hub_id;
> + uint8_t flush_type;
> +};
> +
> enum mes_misc_opcode {
> MES_MISC_OP_WRITE_REG,
> MES_MISC_OP_READ_REG,
> @@ -367,6 +373,9 @@ struct amdgpu_mes_funcs {
>
> int (*reset_hw_queue)(struct amdgpu_mes *mes,
> struct mes_reset_queue_input *input);
> +
> + int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
> + struct mes_inv_tlbs_pasid_input *input);
> };
>
> #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> index feb92e107af8..323ec04094ed 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> @@ -339,6 +339,21 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> uint16_t queried;
> int vmid, i;
>
> + if (adev->enable_uni_mes && adev->mes.ring[0].sched.ready &&
maybe specify the pipe index explicitly? E.g.,,
adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready
I always forget which index is KIQ and which is SCHED.
> + (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x81) {
> +
> + struct mes_inv_tlbs_pasid_input input = {0};
> + input.pasid = pasid;
> + input.flush_type = flush_type;
> + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
> + if (all_hub) {
> + /* hub_id = 1 means for mm_hub*/
> + input.hub_id = 1;
> + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
> + }
> + return;
> + }
> +
> for (vmid = 1; vmid < 16; vmid++) {
> bool valid;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> index 6b222630f3fa..2e9191fffaf1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> @@ -108,6 +108,7 @@ static const char *mes_v12_0_opcodes[] = {
> "SET_SE_MODE",
> "SET_GANG_SUBMIT",
> "SET_HW_RSRC_1",
> + "INVALIDATE_TLBS",
> };
>
> static const char *mes_v12_0_misc_opcodes[] = {
> @@ -879,6 +880,28 @@ static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
> offsetof(union MESAPI__RESET, api_status));
> }
>
> +static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes,
> + struct mes_inv_tlbs_pasid_input *input)
> +{
> + union MESAPI__INV_TLBS mes_inv_tlbs;
> +
> + memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
> +
> + mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
> + mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
> + mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
> +
> + mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
> + mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
> + mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
> + mes_inv_tlbs.invalidate_tlbs.hub_id = (uint32_t)input->hub_id;
> +
> + return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_KIQ_PIPE,
Should this be AMDGPU_MES_KIQ_PIPE or AMDGPU_MES_SCHED_PIPE? This
seems to differ from the check above in
gmc_v12_0_flush_gpu_tlb_pasid().
Alex
> + &mes_inv_tlbs, sizeof(mes_inv_tlbs),
> + offsetof(union MESAPI__INV_TLBS, api_status));
> +
> +}
> +
> static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
> .add_hw_queue = mes_v12_0_add_hw_queue,
> .remove_hw_queue = mes_v12_0_remove_hw_queue,
> @@ -888,6 +911,7 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
> .resume_gang = mes_v12_0_resume_gang,
> .misc_op = mes_v12_0_misc_op,
> .reset_hw_queue = mes_v12_0_reset_hw_queue,
> + .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid,
> };
>
> static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev,
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* RE: [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
2025-08-06 17:44 ` Alex Deucher
@ 2025-08-06 18:02 ` Liu, Shaoyun
2025-08-06 18:05 ` Alex Deucher
0 siblings, 1 reply; 15+ messages in thread
From: Liu, Shaoyun @ 2025-08-06 18:02 UTC (permalink / raw)
To: Alex Deucher; +Cc: amd-gfx@lists.freedesktop.org
[AMD Official Use Only - AMD Internal Distribution Only]
-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Alex Deucher
Sent: Wednesday, August 6, 2025 1:45 PM
To: Liu, Shaoyun <Shaoyun.Liu@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
On Wed, Jul 30, 2025 at 10:33 AM Shaoyun Liu <shaoyun.liu@amd.com> wrote:
>
> From MES version 0x81, it provide the new API INV_TLBS that support
> invalidate tlbs with PASID.
>
> Signed-off-by: Shaoyun Liu <shaoyun.liu@amd.com>
> ---> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 9 +++++++++
> drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 15 +++++++++++++++
> drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 24 ++++++++++++++++++++++++
> 3 files changed, 48 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index c0d2c195fe2e..f4c40f1aecd2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -280,6 +280,12 @@ struct mes_reset_queue_input {
> bool is_kq;
> };
>
> +struct mes_inv_tlbs_pasid_input {
> + uint16_t pasid;
> + uint8_t hub_id;
> + uint8_t flush_type;
> +};
> +
> enum mes_misc_opcode {
> MES_MISC_OP_WRITE_REG,
> MES_MISC_OP_READ_REG,
> @@ -367,6 +373,9 @@ struct amdgpu_mes_funcs {
>
> int (*reset_hw_queue)(struct amdgpu_mes *mes,
> struct mes_reset_queue_input *input);
> +
> + int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
> + struct mes_inv_tlbs_pasid_input *input);
> };
>
> #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> index feb92e107af8..323ec04094ed 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> @@ -339,6 +339,21 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> uint16_t queried;
> int vmid, i;
>
> + if (adev->enable_uni_mes && adev->mes.ring[0].sched.ready &&
maybe specify the pipe index explicitly? E.g.,,
adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready
I always forget which index is KIQ and which is SCHED.
[shaoyunl] ok .
> + (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >=
> + 0x81) {
> +
> + struct mes_inv_tlbs_pasid_input input = {0};
> + input.pasid = pasid;
> + input.flush_type = flush_type;
> + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
> + if (all_hub) {
> + /* hub_id = 1 means for mm_hub*/
> + input.hub_id = 1;
> + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
> + }
> + return;
> + }
> +
> for (vmid = 1; vmid < 16; vmid++) {
> bool valid;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> index 6b222630f3fa..2e9191fffaf1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> @@ -108,6 +108,7 @@ static const char *mes_v12_0_opcodes[] = {
> "SET_SE_MODE",
> "SET_GANG_SUBMIT",
> "SET_HW_RSRC_1",
> + "INVALIDATE_TLBS",
> };
>
> static const char *mes_v12_0_misc_opcodes[] = { @@ -879,6 +880,28 @@
> static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
> offsetof(union MESAPI__RESET, api_status)); }
>
> +static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes,
> + struct mes_inv_tlbs_pasid_input
> +*input) {
> + union MESAPI__INV_TLBS mes_inv_tlbs;
> +
> + memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
> +
> + mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
> + mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
> + mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
> +
> + mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
> + mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
> + mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
> + mes_inv_tlbs.invalidate_tlbs.hub_id = (uint32_t)input->hub_id;
> +
> + return mes_v12_0_submit_pkt_and_poll_completion(mes,
> + AMDGPU_MES_KIQ_PIPE,
Should this be AMDGPU_MES_KIQ_PIPE or AMDGPU_MES_SCHED_PIPE? This seems to differ from the check above in gmc_v12_0_flush_gpu_tlb_pasid().
Alex
[Shaoyun.liu] What I want is submit to the kiq pipe when unified MES is ready , the above check is for MES is ready or not. We would like all none running-list related operation been submitted to KIQ so it won't create any unnecessary delay or impact on process schedule decision . It's like original driver usage for KIQ . The sched pipe (HIQ) will be used for all user queues.
> + &mes_inv_tlbs, sizeof(mes_inv_tlbs),
> + offsetof(union MESAPI__INV_TLBS, api_status));
> +
> +}
> +
> static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
> .add_hw_queue = mes_v12_0_add_hw_queue,
> .remove_hw_queue = mes_v12_0_remove_hw_queue, @@ -888,6 +911,7
> @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
> .resume_gang = mes_v12_0_resume_gang,
> .misc_op = mes_v12_0_misc_op,
> .reset_hw_queue = mes_v12_0_reset_hw_queue,
> + .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid,
> };
>
> static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device
> *adev,
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
2025-08-06 18:02 ` Liu, Shaoyun
@ 2025-08-06 18:05 ` Alex Deucher
2025-08-06 18:27 ` Liu, Shaoyun
0 siblings, 1 reply; 15+ messages in thread
From: Alex Deucher @ 2025-08-06 18:05 UTC (permalink / raw)
To: Liu, Shaoyun; +Cc: amd-gfx@lists.freedesktop.org
On Wed, Aug 6, 2025 at 2:02 PM Liu, Shaoyun <Shaoyun.Liu@amd.com> wrote:
>
> [AMD Official Use Only - AMD Internal Distribution Only]
>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Alex Deucher
> Sent: Wednesday, August 6, 2025 1:45 PM
> To: Liu, Shaoyun <Shaoyun.Liu@amd.com>
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
>
> On Wed, Jul 30, 2025 at 10:33 AM Shaoyun Liu <shaoyun.liu@amd.com> wrote:
> >
> > From MES version 0x81, it provide the new API INV_TLBS that support
> > invalidate tlbs with PASID.
> >
> > Signed-off-by: Shaoyun Liu <shaoyun.liu@amd.com>
> > ---> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 9 +++++++++
> > drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 15 +++++++++++++++
> > drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 24 ++++++++++++++++++++++++
> > 3 files changed, 48 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > index c0d2c195fe2e..f4c40f1aecd2 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > @@ -280,6 +280,12 @@ struct mes_reset_queue_input {
> > bool is_kq;
> > };
> >
> > +struct mes_inv_tlbs_pasid_input {
> > + uint16_t pasid;
> > + uint8_t hub_id;
> > + uint8_t flush_type;
> > +};
> > +
> > enum mes_misc_opcode {
> > MES_MISC_OP_WRITE_REG,
> > MES_MISC_OP_READ_REG,
> > @@ -367,6 +373,9 @@ struct amdgpu_mes_funcs {
> >
> > int (*reset_hw_queue)(struct amdgpu_mes *mes,
> > struct mes_reset_queue_input *input);
> > +
> > + int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
> > + struct mes_inv_tlbs_pasid_input *input);
> > };
> >
> > #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> > index feb92e107af8..323ec04094ed 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> > @@ -339,6 +339,21 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> > uint16_t queried;
> > int vmid, i;
> >
> > + if (adev->enable_uni_mes && adev->mes.ring[0].sched.ready &&
>
>
> maybe specify the pipe index explicitly? E.g.,,
> adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready
> I always forget which index is KIQ and which is SCHED.
>
> [shaoyunl] ok .
>
> > + (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >=
> > + 0x81) {
> > +
> > + struct mes_inv_tlbs_pasid_input input = {0};
> > + input.pasid = pasid;
> > + input.flush_type = flush_type;
> > + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
> > + if (all_hub) {
> > + /* hub_id = 1 means for mm_hub*/
> > + input.hub_id = 1;
> > + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
> > + }
> > + return;
> > + }
> > +
> > for (vmid = 1; vmid < 16; vmid++) {
> > bool valid;
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> > b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> > index 6b222630f3fa..2e9191fffaf1 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> > @@ -108,6 +108,7 @@ static const char *mes_v12_0_opcodes[] = {
> > "SET_SE_MODE",
> > "SET_GANG_SUBMIT",
> > "SET_HW_RSRC_1",
> > + "INVALIDATE_TLBS",
> > };
> >
> > static const char *mes_v12_0_misc_opcodes[] = { @@ -879,6 +880,28 @@
> > static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
> > offsetof(union MESAPI__RESET, api_status)); }
> >
> > +static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes,
> > + struct mes_inv_tlbs_pasid_input
> > +*input) {
> > + union MESAPI__INV_TLBS mes_inv_tlbs;
> > +
> > + memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
> > +
> > + mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
> > + mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
> > + mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
> > +
> > + mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
> > + mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
> > + mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
> > + mes_inv_tlbs.invalidate_tlbs.hub_id = (uint32_t)input->hub_id;
> > +
> > + return mes_v12_0_submit_pkt_and_poll_completion(mes,
> > + AMDGPU_MES_KIQ_PIPE,
>
> Should this be AMDGPU_MES_KIQ_PIPE or AMDGPU_MES_SCHED_PIPE? This seems to differ from the check above in gmc_v12_0_flush_gpu_tlb_pasid().
>
> Alex
>
> [Shaoyun.liu] What I want is submit to the kiq pipe when unified MES is ready , the above check is for MES is ready or not. We would like all none running-list related operation been submitted to KIQ so it won't create any unnecessary delay or impact on process schedule decision . It's like original driver usage for KIQ . The sched pipe (HIQ) will be used for all user queues.
>
In that case, should we check if both pipes are ready in
gmc_v12_0_flush_gpu_tlb_pasid()?
Alex
>
> > + &mes_inv_tlbs, sizeof(mes_inv_tlbs),
> > + offsetof(union MESAPI__INV_TLBS, api_status));
> > +
> > +}
> > +
> > static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
> > .add_hw_queue = mes_v12_0_add_hw_queue,
> > .remove_hw_queue = mes_v12_0_remove_hw_queue, @@ -888,6 +911,7
> > @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
> > .resume_gang = mes_v12_0_resume_gang,
> > .misc_op = mes_v12_0_misc_op,
> > .reset_hw_queue = mes_v12_0_reset_hw_queue,
> > + .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid,
> > };
> >
> > static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device
> > *adev,
> > --
> > 2.34.1
> >
^ permalink raw reply [flat|nested] 15+ messages in thread
* RE: [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
2025-08-06 18:05 ` Alex Deucher
@ 2025-08-06 18:27 ` Liu, Shaoyun
0 siblings, 0 replies; 15+ messages in thread
From: Liu, Shaoyun @ 2025-08-06 18:27 UTC (permalink / raw)
To: Alex Deucher; +Cc: amd-gfx@lists.freedesktop.org
[AMD Official Use Only - AMD Internal Distribution Only]
-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Alex Deucher
Sent: Wednesday, August 6, 2025 2:06 PM
To: Liu, Shaoyun <Shaoyun.Liu@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
On Wed, Aug 6, 2025 at 2:02 PM Liu, Shaoyun <Shaoyun.Liu@amd.com> wrote:
>
> [AMD Official Use Only - AMD Internal Distribution Only]
>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
> Alex Deucher
> Sent: Wednesday, August 6, 2025 1:45 PM
> To: Liu, Shaoyun <Shaoyun.Liu@amd.com>
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for
> tlb invalidation on gfx12
>
> On Wed, Jul 30, 2025 at 10:33 AM Shaoyun Liu <shaoyun.liu@amd.com> wrote:
> >
> > From MES version 0x81, it provide the new API INV_TLBS that support
> > invalidate tlbs with PASID.
> >
> > Signed-off-by: Shaoyun Liu <shaoyun.liu@amd.com>
> > ---> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 9 +++++++++
> > drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 15 +++++++++++++++
> > drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 24
> > ++++++++++++++++++++++++
> > 3 files changed, 48 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > index c0d2c195fe2e..f4c40f1aecd2 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > @@ -280,6 +280,12 @@ struct mes_reset_queue_input {
> > bool is_kq;
> > };
> >
> > +struct mes_inv_tlbs_pasid_input {
> > + uint16_t pasid;
> > + uint8_t hub_id;
> > + uint8_t flush_type;
> > +};
> > +
> > enum mes_misc_opcode {
> > MES_MISC_OP_WRITE_REG,
> > MES_MISC_OP_READ_REG,
> > @@ -367,6 +373,9 @@ struct amdgpu_mes_funcs {
> >
> > int (*reset_hw_queue)(struct amdgpu_mes *mes,
> > struct mes_reset_queue_input *input);
> > +
> > + int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
> > + struct mes_inv_tlbs_pasid_input
> > + *input);
> > };
> >
> > #define amdgpu_mes_kiq_hw_init(adev)
> > (adev)->mes.kiq_hw_init((adev)) diff --git
> > a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> > index feb92e107af8..323ec04094ed 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> > @@ -339,6 +339,21 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> > uint16_t queried;
> > int vmid, i;
> >
> > + if (adev->enable_uni_mes && adev->mes.ring[0].sched.ready &&
>
>
> maybe specify the pipe index explicitly? E.g.,,
> adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready
> I always forget which index is KIQ and which is SCHED.
>
> [shaoyunl] ok .
>
> > + (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >=
> > + 0x81) {
> > +
> > + struct mes_inv_tlbs_pasid_input input = {0};
> > + input.pasid = pasid;
> > + input.flush_type = flush_type;
> > + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
> > + if (all_hub) {
> > + /* hub_id = 1 means for mm_hub*/
> > + input.hub_id = 1;
> > + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
> > + }
> > + return;
> > + }
> > +
> > for (vmid = 1; vmid < 16; vmid++) {
> > bool valid;
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> > b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> > index 6b222630f3fa..2e9191fffaf1 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> > @@ -108,6 +108,7 @@ static const char *mes_v12_0_opcodes[] = {
> > "SET_SE_MODE",
> > "SET_GANG_SUBMIT",
> > "SET_HW_RSRC_1",
> > + "INVALIDATE_TLBS",
> > };
> >
> > static const char *mes_v12_0_misc_opcodes[] = { @@ -879,6 +880,28
> > @@ static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
> > offsetof(union MESAPI__RESET, api_status));
> > }
> >
> > +static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes,
> > + struct mes_inv_tlbs_pasid_input
> > +*input) {
> > + union MESAPI__INV_TLBS mes_inv_tlbs;
> > +
> > + memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
> > +
> > + mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
> > + mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
> > + mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
> > +
> > + mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
> > + mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
> > + mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
> > + mes_inv_tlbs.invalidate_tlbs.hub_id =
> > + (uint32_t)input->hub_id;
> > +
> > + return mes_v12_0_submit_pkt_and_poll_completion(mes,
> > + AMDGPU_MES_KIQ_PIPE,
>
> Should this be AMDGPU_MES_KIQ_PIPE or AMDGPU_MES_SCHED_PIPE? This seems to differ from the check above in gmc_v12_0_flush_gpu_tlb_pasid().
>
> Alex
>
> [Shaoyun.liu] What I want is submit to the kiq pipe when unified MES is ready , the above check is for MES is ready or not. We would like all none running-list related operation been submitted to KIQ so it won't create any unnecessary delay or impact on process schedule decision . It's like original driver usage for KIQ . The sched pipe (HIQ) will be used for all user queues.
>
In that case, should we check if both pipes are ready in gmc_v12_0_flush_gpu_tlb_pasid()?
Alex
[shaoyunl] Seems when mes is enabled , it assume the mes kiq pipe will always be ready and I haven't see the code to set and check the ready for mes.ring[KIQ] .
>
> > + &mes_inv_tlbs, sizeof(mes_inv_tlbs),
> > + offsetof(union MESAPI__INV_TLBS,
> > + api_status));
> > +
> > +}
> > +
> > static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
> > .add_hw_queue = mes_v12_0_add_hw_queue,
> > .remove_hw_queue = mes_v12_0_remove_hw_queue, @@ -888,6
> > +911,7 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
> > .resume_gang = mes_v12_0_resume_gang,
> > .misc_op = mes_v12_0_misc_op,
> > .reset_hw_queue = mes_v12_0_reset_hw_queue,
> > + .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid,
> > };
> >
> > static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device
> > *adev,
> > --
> > 2.34.1
> >
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
@ 2025-08-06 18:32 Shaoyun Liu
0 siblings, 0 replies; 15+ messages in thread
From: Shaoyun Liu @ 2025-08-06 18:32 UTC (permalink / raw)
To: amd-gfx; +Cc: Shaoyun Liu
From MES version 0x81, it provide the new API INV_TLBS that support
invalidate tlbs with PASID.
Signed-off-by: Shaoyun Liu <shaoyun.liu@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 10 ++++++++++
drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 15 +++++++++++++++
drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 24 ++++++++++++++++++++++++
3 files changed, 49 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index c0d2c195fe2e..489a4a0f0610 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -280,6 +280,13 @@ struct mes_reset_queue_input {
bool is_kq;
};
+struct mes_inv_tlbs_pasid_input {
+ uint32_t xcc_id;
+ uint16_t pasid;
+ uint8_t hub_id;
+ uint8_t flush_type;
+};
+
enum mes_misc_opcode {
MES_MISC_OP_WRITE_REG,
MES_MISC_OP_READ_REG,
@@ -367,6 +374,9 @@ struct amdgpu_mes_funcs {
int (*reset_hw_queue)(struct amdgpu_mes *mes,
struct mes_reset_queue_input *input);
+
+ int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
+ struct mes_inv_tlbs_pasid_input *input);
};
#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index feb92e107af8..c65270f7097b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -339,6 +339,21 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint16_t queried;
int vmid, i;
+ if (adev->enable_uni_mes && adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready &&
+ (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x81) {
+
+ struct mes_inv_tlbs_pasid_input input = {0};
+ input.pasid = pasid;
+ input.flush_type = flush_type;
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ if (all_hub) {
+ /* hub_id = 1 means for mm_hub*/
+ input.hub_id = 1;
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ }
+ return;
+ }
+
for (vmid = 1; vmid < 16; vmid++) {
bool valid;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index 6b222630f3fa..2e9191fffaf1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -108,6 +108,7 @@ static const char *mes_v12_0_opcodes[] = {
"SET_SE_MODE",
"SET_GANG_SUBMIT",
"SET_HW_RSRC_1",
+ "INVALIDATE_TLBS",
};
static const char *mes_v12_0_misc_opcodes[] = {
@@ -879,6 +880,28 @@ static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
offsetof(union MESAPI__RESET, api_status));
}
+static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes,
+ struct mes_inv_tlbs_pasid_input *input)
+{
+ union MESAPI__INV_TLBS mes_inv_tlbs;
+
+ memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
+
+ mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
+ mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
+ mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
+ mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
+ mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
+ mes_inv_tlbs.invalidate_tlbs.hub_id = (uint32_t)input->hub_id;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_KIQ_PIPE,
+ &mes_inv_tlbs, sizeof(mes_inv_tlbs),
+ offsetof(union MESAPI__INV_TLBS, api_status));
+
+}
+
static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
.add_hw_queue = mes_v12_0_add_hw_queue,
.remove_hw_queue = mes_v12_0_remove_hw_queue,
@@ -888,6 +911,7 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
.resume_gang = mes_v12_0_resume_gang,
.misc_op = mes_v12_0_misc_op,
.reset_hw_queue = mes_v12_0_reset_hw_queue,
+ .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid,
};
static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev,
--
2.34.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
@ 2025-08-20 18:47 Shaoyun Liu
2025-08-20 19:57 ` Alex Deucher
0 siblings, 1 reply; 15+ messages in thread
From: Shaoyun Liu @ 2025-08-20 18:47 UTC (permalink / raw)
To: amd-gfx; +Cc: Shaoyun Liu
From MES version 0x81, it provide the new API INV_TLBS that support
invalidate tlbs with PASID.
Signed-off-by: Shaoyun Liu <shaoyun.liu@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 16 ++++++++++++++++
drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 22 ++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 25 +++++++++++++++++++++++++
3 files changed, 63 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index c0d2c195fe2e..1b6e7b4fde36 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -280,6 +280,19 @@ struct mes_reset_queue_input {
bool is_kq;
};
+enum amdgpu_mes_hub_id {
+ AMDGPU_MES_GC_HUB = 0,
+ AMDGPU_MES_MM_HUB0 = 1,
+ AMDGPU_MES_MM_HUB1 = 2,
+};
+
+struct mes_inv_tlbs_pasid_input {
+ uint32_t xcc_id;
+ uint16_t pasid;
+ uint8_t hub_id;
+ uint8_t flush_type;
+};
+
enum mes_misc_opcode {
MES_MISC_OP_WRITE_REG,
MES_MISC_OP_READ_REG,
@@ -367,6 +380,9 @@ struct amdgpu_mes_funcs {
int (*reset_hw_queue)(struct amdgpu_mes *mes,
struct mes_reset_queue_input *input);
+
+ int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
+ struct mes_inv_tlbs_pasid_input *input);
};
#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index feb92e107af8..ef58a849d67d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -339,6 +339,28 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint16_t queried;
int vmid, i;
+ if (adev->enable_uni_mes && adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready &&
+ (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x81) {
+
+ struct mes_inv_tlbs_pasid_input input = {0};
+ input.pasid = pasid;
+ input.flush_type = flush_type;
+ if (all_hub) {
+ for_each_set_bit(i, adev->vmhubs_mask,
+ AMDGPU_MAX_VMHUBS) {
+ /*
+ * For gfx12, the index i from vmhubs_mask matchs AMDGPU_MES_HUB_ID,
+ * Need to convert them if they are not match in future asic
+ */
+ input.hub_id = i;
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ }
+ } else {
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ }
+ return;
+ }
+
for (vmid = 1; vmid < 16; vmid++) {
bool valid;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index 6b222630f3fa..bcaaccf28765 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -108,6 +108,7 @@ static const char *mes_v12_0_opcodes[] = {
"SET_SE_MODE",
"SET_GANG_SUBMIT",
"SET_HW_RSRC_1",
+ "INVALIDATE_TLBS",
};
static const char *mes_v12_0_misc_opcodes[] = {
@@ -879,6 +880,29 @@ static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
offsetof(union MESAPI__RESET, api_status));
}
+static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes,
+ struct mes_inv_tlbs_pasid_input *input)
+{
+ union MESAPI__INV_TLBS mes_inv_tlbs;
+
+ memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
+
+ mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
+ mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
+ mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
+ mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
+ mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
+ /*The AMDGPU_MES_HUB_ID from input matchs mes expection on gfx12*/
+ mes_inv_tlbs.invalidate_tlbs.hub_id = (uint32_t)input->hub_id;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_KIQ_PIPE,
+ &mes_inv_tlbs, sizeof(mes_inv_tlbs),
+ offsetof(union MESAPI__INV_TLBS, api_status));
+
+}
+
static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
.add_hw_queue = mes_v12_0_add_hw_queue,
.remove_hw_queue = mes_v12_0_remove_hw_queue,
@@ -888,6 +912,7 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
.resume_gang = mes_v12_0_resume_gang,
.misc_op = mes_v12_0_misc_op,
.reset_hw_queue = mes_v12_0_reset_hw_queue,
+ .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid,
};
static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev,
--
2.34.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
2025-08-20 18:47 Shaoyun Liu
@ 2025-08-20 19:57 ` Alex Deucher
2025-08-21 2:27 ` Liu, Shaoyun
0 siblings, 1 reply; 15+ messages in thread
From: Alex Deucher @ 2025-08-20 19:57 UTC (permalink / raw)
To: Shaoyun Liu; +Cc: amd-gfx
On Wed, Aug 20, 2025 at 2:47 PM Shaoyun Liu <shaoyun.liu@amd.com> wrote:
>
> From MES version 0x81, it provide the new API INV_TLBS that support
> invalidate tlbs with PASID.
>
> Signed-off-by: Shaoyun Liu <shaoyun.liu@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 16 ++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 22 ++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 25 +++++++++++++++++++++++++
> 3 files changed, 63 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index c0d2c195fe2e..1b6e7b4fde36 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -280,6 +280,19 @@ struct mes_reset_queue_input {
> bool is_kq;
> };
>
> +enum amdgpu_mes_hub_id {
> + AMDGPU_MES_GC_HUB = 0,
> + AMDGPU_MES_MM_HUB0 = 1,
> + AMDGPU_MES_MM_HUB1 = 2,
> +};
> +
> +struct mes_inv_tlbs_pasid_input {
> + uint32_t xcc_id;
> + uint16_t pasid;
> + uint8_t hub_id;
> + uint8_t flush_type;
> +};
> +
> enum mes_misc_opcode {
> MES_MISC_OP_WRITE_REG,
> MES_MISC_OP_READ_REG,
> @@ -367,6 +380,9 @@ struct amdgpu_mes_funcs {
>
> int (*reset_hw_queue)(struct amdgpu_mes *mes,
> struct mes_reset_queue_input *input);
> +
> + int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
> + struct mes_inv_tlbs_pasid_input *input);
> };
>
> #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> index feb92e107af8..ef58a849d67d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> @@ -339,6 +339,28 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> uint16_t queried;
> int vmid, i;
>
> + if (adev->enable_uni_mes && adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready &&
> + (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x81) {
> +
> + struct mes_inv_tlbs_pasid_input input = {0};
> + input.pasid = pasid;
> + input.flush_type = flush_type;
> + if (all_hub) {
> + for_each_set_bit(i, adev->vmhubs_mask,
> + AMDGPU_MAX_VMHUBS) {
> + /*
> + * For gfx12, the index i from vmhubs_mask matchs AMDGPU_MES_HUB_ID,
> + * Need to convert them if they are not match in future asic
> + */
Are you sure about this? From above:
> + AMDGPU_MES_GC_HUB = 0,
> + AMDGPU_MES_MM_HUB0 = 1,
> + AMDGPU_MES_MM_HUB1 = 2,
And then in amdgpu_vm.h:
/*
* max number of VMHUB
* layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1
*/
#define AMDGPU_MAX_VMHUBS 13
#define AMDGPU_GFXHUB_START 0
#define AMDGPU_MMHUB0_START 8
#define AMDGPU_MMHUB1_START 12
#define AMDGPU_GFXHUB(x) (AMDGPU_GFXHUB_START + (x))
#define AMDGPU_MMHUB0(x) (AMDGPU_MMHUB0_START + (x))
#define AMDGPU_MMHUB1(x) (AMDGPU_MMHUB1_START + (x))
Alex
> + input.hub_id = i;
> + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
> + }
> + } else {
> + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
> + }
> + return;
> + }
> +
> for (vmid = 1; vmid < 16; vmid++) {
> bool valid;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> index 6b222630f3fa..bcaaccf28765 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> @@ -108,6 +108,7 @@ static const char *mes_v12_0_opcodes[] = {
> "SET_SE_MODE",
> "SET_GANG_SUBMIT",
> "SET_HW_RSRC_1",
> + "INVALIDATE_TLBS",
> };
>
> static const char *mes_v12_0_misc_opcodes[] = {
> @@ -879,6 +880,29 @@ static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
> offsetof(union MESAPI__RESET, api_status));
> }
>
> +static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes,
> + struct mes_inv_tlbs_pasid_input *input)
> +{
> + union MESAPI__INV_TLBS mes_inv_tlbs;
> +
> + memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
> +
> + mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
> + mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
> + mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
> +
> + mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
> + mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
> + mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
> + /*The AMDGPU_MES_HUB_ID from input matchs mes expection on gfx12*/
> + mes_inv_tlbs.invalidate_tlbs.hub_id = (uint32_t)input->hub_id;
> +
> + return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_KIQ_PIPE,
> + &mes_inv_tlbs, sizeof(mes_inv_tlbs),
> + offsetof(union MESAPI__INV_TLBS, api_status));
> +
> +}
> +
> static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
> .add_hw_queue = mes_v12_0_add_hw_queue,
> .remove_hw_queue = mes_v12_0_remove_hw_queue,
> @@ -888,6 +912,7 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
> .resume_gang = mes_v12_0_resume_gang,
> .misc_op = mes_v12_0_misc_op,
> .reset_hw_queue = mes_v12_0_reset_hw_queue,
> + .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid,
> };
>
> static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev,
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* RE: [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
2025-08-20 19:57 ` Alex Deucher
@ 2025-08-21 2:27 ` Liu, Shaoyun
2025-08-21 12:48 ` Alex Deucher
0 siblings, 1 reply; 15+ messages in thread
From: Liu, Shaoyun @ 2025-08-21 2:27 UTC (permalink / raw)
To: Alex Deucher; +Cc: amd-gfx@lists.freedesktop.org
[AMD Official Use Only - AMD Internal Distribution Only]
ou are right , the define seems different . Originally I think we only have one gchub for the device and two mmhub from 1 to 2 . so it seems each xcc will have one gchub which make sense to me . but for mmhub0(0-3) and mmhub1(0-1)
I'm confused of how they are mapped to hw side . MES only deal with one mmhub for normal gfx12 , and two mmhub for gfx12_1 .
Regards
Shaoyun.liu
-----Original Message-----
From: Alex Deucher <alexdeucher@gmail.com>
Sent: Wednesday, August 20, 2025 3:57 PM
To: Liu, Shaoyun <Shaoyun.Liu@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
On Wed, Aug 20, 2025 at 2:47 PM Shaoyun Liu <shaoyun.liu@amd.com> wrote:
>
> From MES version 0x81, it provide the new API INV_TLBS that support
> invalidate tlbs with PASID.
>
> Signed-off-by: Shaoyun Liu <shaoyun.liu@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 16 ++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 22 ++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 25 +++++++++++++++++++++++++
> 3 files changed, 63 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index c0d2c195fe2e..1b6e7b4fde36 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -280,6 +280,19 @@ struct mes_reset_queue_input {
> bool is_kq;
> };
>
> +enum amdgpu_mes_hub_id {
> + AMDGPU_MES_GC_HUB = 0,
> + AMDGPU_MES_MM_HUB0 = 1,
> + AMDGPU_MES_MM_HUB1 = 2,
> +};
> +
> +struct mes_inv_tlbs_pasid_input {
> + uint32_t xcc_id;
> + uint16_t pasid;
> + uint8_t hub_id;
> + uint8_t flush_type;
> +};
> +
> enum mes_misc_opcode {
> MES_MISC_OP_WRITE_REG,
> MES_MISC_OP_READ_REG,
> @@ -367,6 +380,9 @@ struct amdgpu_mes_funcs {
>
> int (*reset_hw_queue)(struct amdgpu_mes *mes,
> struct mes_reset_queue_input *input);
> +
> + int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
> + struct mes_inv_tlbs_pasid_input *input);
> };
>
> #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> index feb92e107af8..ef58a849d67d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> @@ -339,6 +339,28 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> uint16_t queried;
> int vmid, i;
>
> + if (adev->enable_uni_mes && adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready &&
> + (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >=
> + 0x81) {
> +
> + struct mes_inv_tlbs_pasid_input input = {0};
> + input.pasid = pasid;
> + input.flush_type = flush_type;
> + if (all_hub) {
> + for_each_set_bit(i, adev->vmhubs_mask,
> + AMDGPU_MAX_VMHUBS) {
> + /*
> + * For gfx12, the index i from vmhubs_mask matchs AMDGPU_MES_HUB_ID,
> + * Need to convert them if they are not match in future asic
> + */
Are you sure about this? From above:
> + AMDGPU_MES_GC_HUB = 0,
> + AMDGPU_MES_MM_HUB0 = 1,
> + AMDGPU_MES_MM_HUB1 = 2,
And then in amdgpu_vm.h:
/*
* max number of VMHUB
* layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1 */
#define AMDGPU_MAX_VMHUBS 13
#define AMDGPU_GFXHUB_START 0
#define AMDGPU_MMHUB0_START 8
#define AMDGPU_MMHUB1_START 12
#define AMDGPU_GFXHUB(x) (AMDGPU_GFXHUB_START + (x))
#define AMDGPU_MMHUB0(x) (AMDGPU_MMHUB0_START + (x))
#define AMDGPU_MMHUB1(x) (AMDGPU_MMHUB1_START + (x))
Alex
> + input.hub_id = i;
> + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
> + }
> + } else {
> + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
> + }
> + return;
> + }
> +
> for (vmid = 1; vmid < 16; vmid++) {
> bool valid;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> index 6b222630f3fa..bcaaccf28765 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> @@ -108,6 +108,7 @@ static const char *mes_v12_0_opcodes[] = {
> "SET_SE_MODE",
> "SET_GANG_SUBMIT",
> "SET_HW_RSRC_1",
> + "INVALIDATE_TLBS",
> };
>
> static const char *mes_v12_0_misc_opcodes[] = { @@ -879,6 +880,29 @@
> static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
> offsetof(union MESAPI__RESET, api_status)); }
>
> +static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes,
> + struct mes_inv_tlbs_pasid_input
> +*input) {
> + union MESAPI__INV_TLBS mes_inv_tlbs;
> +
> + memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
> +
> + mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
> + mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
> + mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
> +
> + mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
> + mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
> + mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
> + /*The AMDGPU_MES_HUB_ID from input matchs mes expection on gfx12*/
> + mes_inv_tlbs.invalidate_tlbs.hub_id = (uint32_t)input->hub_id;
> +
> + return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_KIQ_PIPE,
> + &mes_inv_tlbs, sizeof(mes_inv_tlbs),
> + offsetof(union MESAPI__INV_TLBS, api_status));
> +
> +}
> +
> static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
> .add_hw_queue = mes_v12_0_add_hw_queue,
> .remove_hw_queue = mes_v12_0_remove_hw_queue, @@ -888,6 +912,7
> @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
> .resume_gang = mes_v12_0_resume_gang,
> .misc_op = mes_v12_0_misc_op,
> .reset_hw_queue = mes_v12_0_reset_hw_queue,
> + .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid,
> };
>
> static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device
> *adev,
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
2025-08-21 2:27 ` Liu, Shaoyun
@ 2025-08-21 12:48 ` Alex Deucher
0 siblings, 0 replies; 15+ messages in thread
From: Alex Deucher @ 2025-08-21 12:48 UTC (permalink / raw)
To: Liu, Shaoyun; +Cc: amd-gfx@lists.freedesktop.org
On Wed, Aug 20, 2025 at 10:27 PM Liu, Shaoyun <Shaoyun.Liu@amd.com> wrote:
>
> [AMD Official Use Only - AMD Internal Distribution Only]
>
> ou are right , the define seems different . Originally I think we only have one gchub for the device and two mmhub from 1 to 2 . so it seems each xcc will have one gchub which make sense to me . but for mmhub0(0-3) and mmhub1(0-1)
> I'm confused of how they are mapped to hw side . MES only deal with one mmhub for normal gfx12 , and two mmhub for gfx12_1 .
Some of the MI parts have multiple GC and MM hubs so we abstract them
generically the driver and then map then however we need to when
accessing the actual hw specific bits.
Alex
>
> Regards
> Shaoyun.liu
>
>
> -----Original Message-----
> From: Alex Deucher <alexdeucher@gmail.com>
> Sent: Wednesday, August 20, 2025 3:57 PM
> To: Liu, Shaoyun <Shaoyun.Liu@amd.com>
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
>
> On Wed, Aug 20, 2025 at 2:47 PM Shaoyun Liu <shaoyun.liu@amd.com> wrote:
> >
> > From MES version 0x81, it provide the new API INV_TLBS that support
> > invalidate tlbs with PASID.
> >
> > Signed-off-by: Shaoyun Liu <shaoyun.liu@amd.com>
> > ---
> > drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 16 ++++++++++++++++
> > drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 22 ++++++++++++++++++++++
> > drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 25 +++++++++++++++++++++++++
> > 3 files changed, 63 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > index c0d2c195fe2e..1b6e7b4fde36 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> > @@ -280,6 +280,19 @@ struct mes_reset_queue_input {
> > bool is_kq;
> > };
> >
> > +enum amdgpu_mes_hub_id {
> > + AMDGPU_MES_GC_HUB = 0,
> > + AMDGPU_MES_MM_HUB0 = 1,
> > + AMDGPU_MES_MM_HUB1 = 2,
> > +};
> > +
> > +struct mes_inv_tlbs_pasid_input {
> > + uint32_t xcc_id;
> > + uint16_t pasid;
> > + uint8_t hub_id;
> > + uint8_t flush_type;
> > +};
> > +
> > enum mes_misc_opcode {
> > MES_MISC_OP_WRITE_REG,
> > MES_MISC_OP_READ_REG,
> > @@ -367,6 +380,9 @@ struct amdgpu_mes_funcs {
> >
> > int (*reset_hw_queue)(struct amdgpu_mes *mes,
> > struct mes_reset_queue_input *input);
> > +
> > + int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
> > + struct mes_inv_tlbs_pasid_input *input);
> > };
> >
> > #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> > index feb92e107af8..ef58a849d67d 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> > @@ -339,6 +339,28 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> > uint16_t queried;
> > int vmid, i;
> >
> > + if (adev->enable_uni_mes && adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready &&
> > + (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >=
> > + 0x81) {
> > +
> > + struct mes_inv_tlbs_pasid_input input = {0};
> > + input.pasid = pasid;
> > + input.flush_type = flush_type;
> > + if (all_hub) {
> > + for_each_set_bit(i, adev->vmhubs_mask,
> > + AMDGPU_MAX_VMHUBS) {
> > + /*
> > + * For gfx12, the index i from vmhubs_mask matchs AMDGPU_MES_HUB_ID,
> > + * Need to convert them if they are not match in future asic
> > + */
>
> Are you sure about this? From above:
>
> > + AMDGPU_MES_GC_HUB = 0,
> > + AMDGPU_MES_MM_HUB0 = 1,
> > + AMDGPU_MES_MM_HUB1 = 2,
>
> And then in amdgpu_vm.h:
>
> /*
> * max number of VMHUB
> * layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1 */
> #define AMDGPU_MAX_VMHUBS 13
> #define AMDGPU_GFXHUB_START 0
> #define AMDGPU_MMHUB0_START 8
> #define AMDGPU_MMHUB1_START 12
> #define AMDGPU_GFXHUB(x) (AMDGPU_GFXHUB_START + (x))
> #define AMDGPU_MMHUB0(x) (AMDGPU_MMHUB0_START + (x))
> #define AMDGPU_MMHUB1(x) (AMDGPU_MMHUB1_START + (x))
>
> Alex
>
> > + input.hub_id = i;
> > + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
> > + }
> > + } else {
> > + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
> > + }
> > + return;
> > + }
> > +
> > for (vmid = 1; vmid < 16; vmid++) {
> > bool valid;
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> > b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> > index 6b222630f3fa..bcaaccf28765 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> > @@ -108,6 +108,7 @@ static const char *mes_v12_0_opcodes[] = {
> > "SET_SE_MODE",
> > "SET_GANG_SUBMIT",
> > "SET_HW_RSRC_1",
> > + "INVALIDATE_TLBS",
> > };
> >
> > static const char *mes_v12_0_misc_opcodes[] = { @@ -879,6 +880,29 @@
> > static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
> > offsetof(union MESAPI__RESET, api_status)); }
> >
> > +static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes,
> > + struct mes_inv_tlbs_pasid_input
> > +*input) {
> > + union MESAPI__INV_TLBS mes_inv_tlbs;
> > +
> > + memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
> > +
> > + mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
> > + mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
> > + mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
> > +
> > + mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
> > + mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
> > + mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
> > + /*The AMDGPU_MES_HUB_ID from input matchs mes expection on gfx12*/
> > + mes_inv_tlbs.invalidate_tlbs.hub_id = (uint32_t)input->hub_id;
> > +
> > + return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_KIQ_PIPE,
> > + &mes_inv_tlbs, sizeof(mes_inv_tlbs),
> > + offsetof(union MESAPI__INV_TLBS, api_status));
> > +
> > +}
> > +
> > static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
> > .add_hw_queue = mes_v12_0_add_hw_queue,
> > .remove_hw_queue = mes_v12_0_remove_hw_queue, @@ -888,6 +912,7
> > @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
> > .resume_gang = mes_v12_0_resume_gang,
> > .misc_op = mes_v12_0_misc_op,
> > .reset_hw_queue = mes_v12_0_reset_hw_queue,
> > + .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid,
> > };
> >
> > static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device
> > *adev,
> > --
> > 2.34.1
> >
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
@ 2025-08-21 15:20 Shaoyun Liu
2025-08-22 14:30 ` Alex Deucher
0 siblings, 1 reply; 15+ messages in thread
From: Shaoyun Liu @ 2025-08-21 15:20 UTC (permalink / raw)
To: amd-gfx; +Cc: Shaoyun Liu
From MES version 0x81, it provide the new API INV_TLBS that support
invalidate tlbs with PASID.
Signed-off-by: Shaoyun Liu <shaoyun.liu@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 27 ++++++++++++++++
drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 20 ++++++++++++
drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 43 +++++++++++++++++++++++++
3 files changed, 90 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index c0d2c195fe2e..46235b8726f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -280,6 +280,30 @@ struct mes_reset_queue_input {
bool is_kq;
};
+enum amdgpu_mes_hub_id {
+ AMDGPU_MES_GC_HUB0 = 0,
+ AMDGPU_MES_GC_HUB1 = 1,
+ AMDGPU_MES_GC_HUB2 = 2,
+ AMDGPU_MES_GC_HUB3 = 3,
+ AMDGPU_MES_GC_HUB4 = 4,
+ AMDGPU_MES_GC_HUB5 = 5,
+ AMDGPU_MES_GC_HUB6 = 6,
+ AMDGPU_MES_GC_HUB7 = 7,
+ AMDGPU_MES_MM_HUB00 = 8,
+ AMDGPU_MES_MM_HUB01 = 9,
+ AMDGPU_MES_MM_HUB02= 10,
+ AMDGPU_MES_MM_HUB03 = 11,
+ AMDGPU_MES_MM_HUB10 = 12,
+ AMDGPU_MES_MM_HUB11 = 13,
+};
+
+struct mes_inv_tlbs_pasid_input {
+ uint32_t xcc_id;
+ uint16_t pasid;
+ uint8_t hub_id;
+ uint8_t flush_type;
+};
+
enum mes_misc_opcode {
MES_MISC_OP_WRITE_REG,
MES_MISC_OP_READ_REG,
@@ -367,6 +391,9 @@ struct amdgpu_mes_funcs {
int (*reset_hw_queue)(struct amdgpu_mes *mes,
struct mes_reset_queue_input *input);
+
+ int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
+ struct mes_inv_tlbs_pasid_input *input);
};
#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index feb92e107af8..b5be6c7838aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -339,6 +339,26 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint16_t queried;
int vmid, i;
+ if (adev->enable_uni_mes && adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready &&
+ (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x81) {
+ struct mes_inv_tlbs_pasid_input input = {0};
+ input.pasid = pasid;
+ input.flush_type = flush_type;
+ input.hub_id = AMDGPU_GFXHUB(0);
+ /* MES will invalidate all gc_hub for the device from master */
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ if (all_hub) {
+ /* Only need to invalidate mm_hub now */
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
+ if (i < AMDGPU_MMHUB0_START)
+ continue;
+ input.hub_id = i;
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ }
+ }
+ return;
+ }
+
for (vmid = 1; vmid < 16; vmid++) {
bool valid;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index 6b222630f3fa..6740383f7721 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -108,6 +108,7 @@ static const char *mes_v12_0_opcodes[] = {
"SET_SE_MODE",
"SET_GANG_SUBMIT",
"SET_HW_RSRC_1",
+ "INVALIDATE_TLBS",
};
static const char *mes_v12_0_misc_opcodes[] = {
@@ -879,6 +880,47 @@ static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
offsetof(union MESAPI__RESET, api_status));
}
+static int mes_v12_inv_tlb_convert_hub_id(enum amdgpu_mes_hub_id id)
+{
+ int mes_hub_id = id;
+ /*
+ * MES doesn't support invalidate gc_hub on slave xcc individually
+ * master xcc will invalidate all gc_hub for the partition
+ */
+ if (id == 0)
+ return 0;
+ if (id < AMDGPU_MMHUB0_START)
+ return -EINVAL;
+
+ mes_hub_id -= AMDGPU_MMHUB0_START - 1;
+ /* gfx12 only support maximum one mmhub */
+ return (mes_hub_id > 1) ? -EINVAL: mes_hub_id;
+}
+static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes,
+ struct mes_inv_tlbs_pasid_input *input)
+{
+ union MESAPI__INV_TLBS mes_inv_tlbs;
+
+ memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
+
+ mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
+ mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
+ mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
+ mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
+ mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
+
+ /*convert amdgpu_mes_hub_id to mes expected hub_id */
+ mes_inv_tlbs.invalidate_tlbs.hub_id = mes_v12_inv_tlb_convert_hub_id(input->hub_id);
+ if (mes_inv_tlbs.invalidate_tlbs.hub_id < 0)
+ return -EINVAL;
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_KIQ_PIPE,
+ &mes_inv_tlbs, sizeof(mes_inv_tlbs),
+ offsetof(union MESAPI__INV_TLBS, api_status));
+
+}
+
static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
.add_hw_queue = mes_v12_0_add_hw_queue,
.remove_hw_queue = mes_v12_0_remove_hw_queue,
@@ -888,6 +930,7 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
.resume_gang = mes_v12_0_resume_gang,
.misc_op = mes_v12_0_misc_op,
.reset_hw_queue = mes_v12_0_reset_hw_queue,
+ .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid,
};
static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev,
--
2.34.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
2025-08-21 15:20 Shaoyun Liu
@ 2025-08-22 14:30 ` Alex Deucher
0 siblings, 0 replies; 15+ messages in thread
From: Alex Deucher @ 2025-08-22 14:30 UTC (permalink / raw)
To: Shaoyun Liu; +Cc: amd-gfx
On Thu, Aug 21, 2025 at 11:28 AM Shaoyun Liu <shaoyun.liu@amd.com> wrote:
>
> From MES version 0x81, it provide the new API INV_TLBS that support
> invalidate tlbs with PASID.
>
> Signed-off-by: Shaoyun Liu <shaoyun.liu@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 27 ++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 20 ++++++++++++
> drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 43 +++++++++++++++++++++++++
> 3 files changed, 90 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index c0d2c195fe2e..46235b8726f3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -280,6 +280,30 @@ struct mes_reset_queue_input {
> bool is_kq;
> };
>
> +enum amdgpu_mes_hub_id {
> + AMDGPU_MES_GC_HUB0 = 0,
> + AMDGPU_MES_GC_HUB1 = 1,
> + AMDGPU_MES_GC_HUB2 = 2,
> + AMDGPU_MES_GC_HUB3 = 3,
> + AMDGPU_MES_GC_HUB4 = 4,
> + AMDGPU_MES_GC_HUB5 = 5,
> + AMDGPU_MES_GC_HUB6 = 6,
> + AMDGPU_MES_GC_HUB7 = 7,
> + AMDGPU_MES_MM_HUB00 = 8,
> + AMDGPU_MES_MM_HUB01 = 9,
> + AMDGPU_MES_MM_HUB02= 10,
> + AMDGPU_MES_MM_HUB03 = 11,
> + AMDGPU_MES_MM_HUB10 = 12,
> + AMDGPU_MES_MM_HUB11 = 13,
> +};
If these are the same as the hub definitions in amdgpu_vm.h you can
just use those directly and skip these.
> +
> +struct mes_inv_tlbs_pasid_input {
> + uint32_t xcc_id;
> + uint16_t pasid;
> + uint8_t hub_id;
> + uint8_t flush_type;
> +};
> +
> enum mes_misc_opcode {
> MES_MISC_OP_WRITE_REG,
> MES_MISC_OP_READ_REG,
> @@ -367,6 +391,9 @@ struct amdgpu_mes_funcs {
>
> int (*reset_hw_queue)(struct amdgpu_mes *mes,
> struct mes_reset_queue_input *input);
> +
> + int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
> + struct mes_inv_tlbs_pasid_input *input);
> };
>
> #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> index feb92e107af8..b5be6c7838aa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> @@ -339,6 +339,26 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> uint16_t queried;
> int vmid, i;
>
> + if (adev->enable_uni_mes && adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready &&
> + (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x81) {
> + struct mes_inv_tlbs_pasid_input input = {0};
> + input.pasid = pasid;
> + input.flush_type = flush_type;
> + input.hub_id = AMDGPU_GFXHUB(0);
> + /* MES will invalidate all gc_hub for the device from master */
> + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
> + if (all_hub) {
> + /* Only need to invalidate mm_hub now */
> + for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
> + if (i < AMDGPU_MMHUB0_START)
> + continue;
I think you can drop the loop here and just have:
input.hub_id = AMDGPU_MMHUB(0);
adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
in the all_hub case since gfx12 only supports one mmhub as well.
> + input.hub_id = i;
> + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
> + }
> + }
> + return;
> + }
> +
> for (vmid = 1; vmid < 16; vmid++) {
> bool valid;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> index 6b222630f3fa..6740383f7721 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> @@ -108,6 +108,7 @@ static const char *mes_v12_0_opcodes[] = {
> "SET_SE_MODE",
> "SET_GANG_SUBMIT",
> "SET_HW_RSRC_1",
> + "INVALIDATE_TLBS",
> };
>
> static const char *mes_v12_0_misc_opcodes[] = {
> @@ -879,6 +880,47 @@ static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
> offsetof(union MESAPI__RESET, api_status));
> }
>
> +static int mes_v12_inv_tlb_convert_hub_id(enum amdgpu_mes_hub_id id)
> +{
> + int mes_hub_id = id;
> + /*
> + * MES doesn't support invalidate gc_hub on slave xcc individually
> + * master xcc will invalidate all gc_hub for the partition
> + */
> + if (id == 0)
> + return 0;
> + if (id < AMDGPU_MMHUB0_START)
> + return -EINVAL;
> +
> + mes_hub_id -= AMDGPU_MMHUB0_START - 1;
> + /* gfx12 only support maximum one mmhub */
> + return (mes_hub_id > 1) ? -EINVAL: mes_hub_id;
It would be cleaner to do something like this:
if (AMDGPU_IS_GFXHUB(id))
return 0;
else if (AMDGPU_IS_MMHUB0(id))
return 1;
else
return -EINVAL;
> +}
add a new line here.
Alex
> +static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes,
> + struct mes_inv_tlbs_pasid_input *input)
> +{
> + union MESAPI__INV_TLBS mes_inv_tlbs;
> +
> + memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
> +
> + mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
> + mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
> + mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
> +
> + mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
> + mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
> + mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
> +
> + /*convert amdgpu_mes_hub_id to mes expected hub_id */
> + mes_inv_tlbs.invalidate_tlbs.hub_id = mes_v12_inv_tlb_convert_hub_id(input->hub_id);
> + if (mes_inv_tlbs.invalidate_tlbs.hub_id < 0)
> + return -EINVAL;
> + return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_KIQ_PIPE,
> + &mes_inv_tlbs, sizeof(mes_inv_tlbs),
> + offsetof(union MESAPI__INV_TLBS, api_status));
> +
> +}
> +
> static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
> .add_hw_queue = mes_v12_0_add_hw_queue,
> .remove_hw_queue = mes_v12_0_remove_hw_queue,
> @@ -888,6 +930,7 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
> .resume_gang = mes_v12_0_resume_gang,
> .misc_op = mes_v12_0_misc_op,
> .reset_hw_queue = mes_v12_0_reset_hw_queue,
> + .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid,
> };
>
> static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev,
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
@ 2025-08-22 15:02 Shaoyun Liu
2025-08-22 15:12 ` Alex Deucher
0 siblings, 1 reply; 15+ messages in thread
From: Shaoyun Liu @ 2025-08-22 15:02 UTC (permalink / raw)
To: amd-gfx; +Cc: Shaoyun Liu
From MES version 0x81, it provide the new API INV_TLBS that support
invalidate tlbs with PASID.
Signed-off-by: Shaoyun Liu <shaoyun.liu@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 10 ++++++
drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 16 +++++++++
drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 43 +++++++++++++++++++++++++
3 files changed, 69 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index c0d2c195fe2e..489a4a0f0610 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -280,6 +280,13 @@ struct mes_reset_queue_input {
bool is_kq;
};
+struct mes_inv_tlbs_pasid_input {
+ uint32_t xcc_id;
+ uint16_t pasid;
+ uint8_t hub_id;
+ uint8_t flush_type;
+};
+
enum mes_misc_opcode {
MES_MISC_OP_WRITE_REG,
MES_MISC_OP_READ_REG,
@@ -367,6 +374,9 @@ struct amdgpu_mes_funcs {
int (*reset_hw_queue)(struct amdgpu_mes *mes,
struct mes_reset_queue_input *input);
+
+ int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
+ struct mes_inv_tlbs_pasid_input *input);
};
#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index feb92e107af8..eed62e9a9b96 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -339,6 +339,22 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint16_t queried;
int vmid, i;
+ if (adev->enable_uni_mes && adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready &&
+ (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x81) {
+ struct mes_inv_tlbs_pasid_input input = {0};
+ input.pasid = pasid;
+ input.flush_type = flush_type;
+ input.hub_id = AMDGPU_GFXHUB(0);
+ /* MES will invalidate all gc_hub for the device from master */
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ if (all_hub) {
+ /* Only need to invalidate mm_hub now, gfx12 only support one mmhub */
+ input.hub_id = AMDGPU_MMHUB(0);
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ }
+ return;
+ }
+
for (vmid = 1; vmid < 16; vmid++) {
bool valid;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index 6b222630f3fa..d0b5deed245d 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -108,6 +108,7 @@ static const char *mes_v12_0_opcodes[] = {
"SET_SE_MODE",
"SET_GANG_SUBMIT",
"SET_HW_RSRC_1",
+ "INVALIDATE_TLBS",
};
static const char *mes_v12_0_misc_opcodes[] = {
@@ -879,6 +880,47 @@ static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
offsetof(union MESAPI__RESET, api_status));
}
+static int mes_v12_inv_tlb_convert_hub_id(enum amdgpu_mes_hub_id id)
+{
+ int mes_hub_id = id;
+ /*
+ * MES doesn't support invalidate gc_hub on slave xcc individually
+ * master xcc will invalidate all gc_hub for the partition
+ */
+ if (AMDGPU_IS_GFXHUB(id))
+ return 0;
+ else if (AMDGPU_IS_MMHUB0(id))
+ return 1;
+ else
+ return -EINVAL;
+
+}
+
+static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes,
+ struct mes_inv_tlbs_pasid_input *input)
+{
+ union MESAPI__INV_TLBS mes_inv_tlbs;
+
+ memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
+
+ mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
+ mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
+ mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
+ mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
+ mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
+
+ /*convert amdgpu_mes_hub_id to mes expected hub_id */
+ mes_inv_tlbs.invalidate_tlbs.hub_id = mes_v12_inv_tlb_convert_hub_id(input->hub_id);
+ if (mes_inv_tlbs.invalidate_tlbs.hub_id < 0)
+ return -EINVAL;
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_KIQ_PIPE,
+ &mes_inv_tlbs, sizeof(mes_inv_tlbs),
+ offsetof(union MESAPI__INV_TLBS, api_status));
+
+}
+
static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
.add_hw_queue = mes_v12_0_add_hw_queue,
.remove_hw_queue = mes_v12_0_remove_hw_queue,
@@ -888,6 +930,7 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
.resume_gang = mes_v12_0_resume_gang,
.misc_op = mes_v12_0_misc_op,
.reset_hw_queue = mes_v12_0_reset_hw_queue,
+ .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid,
};
static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev,
--
2.34.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12
2025-08-22 15:02 [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12 Shaoyun Liu
@ 2025-08-22 15:12 ` Alex Deucher
0 siblings, 0 replies; 15+ messages in thread
From: Alex Deucher @ 2025-08-22 15:12 UTC (permalink / raw)
To: Shaoyun Liu; +Cc: amd-gfx
On Fri, Aug 22, 2025 at 11:02 AM Shaoyun Liu <shaoyun.liu@amd.com> wrote:
>
> From MES version 0x81, it provide the new API INV_TLBS that support
> invalidate tlbs with PASID.
>
> Signed-off-by: Shaoyun Liu <shaoyun.liu@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 10 ++++++
> drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 16 +++++++++
> drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 43 +++++++++++++++++++++++++
> 3 files changed, 69 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index c0d2c195fe2e..489a4a0f0610 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -280,6 +280,13 @@ struct mes_reset_queue_input {
> bool is_kq;
> };
>
> +struct mes_inv_tlbs_pasid_input {
> + uint32_t xcc_id;
> + uint16_t pasid;
> + uint8_t hub_id;
> + uint8_t flush_type;
> +};
> +
> enum mes_misc_opcode {
> MES_MISC_OP_WRITE_REG,
> MES_MISC_OP_READ_REG,
> @@ -367,6 +374,9 @@ struct amdgpu_mes_funcs {
>
> int (*reset_hw_queue)(struct amdgpu_mes *mes,
> struct mes_reset_queue_input *input);
> +
> + int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
> + struct mes_inv_tlbs_pasid_input *input);
> };
>
> #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> index feb92e107af8..eed62e9a9b96 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> @@ -339,6 +339,22 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> uint16_t queried;
> int vmid, i;
>
> + if (adev->enable_uni_mes && adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready &&
> + (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x81) {
> + struct mes_inv_tlbs_pasid_input input = {0};
> + input.pasid = pasid;
> + input.flush_type = flush_type;
> + input.hub_id = AMDGPU_GFXHUB(0);
> + /* MES will invalidate all gc_hub for the device from master */
> + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
> + if (all_hub) {
> + /* Only need to invalidate mm_hub now, gfx12 only support one mmhub */
> + input.hub_id = AMDGPU_MMHUB(0);
> + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
> + }
> + return;
> + }
> +
> for (vmid = 1; vmid < 16; vmid++) {
> bool valid;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> index 6b222630f3fa..d0b5deed245d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> @@ -108,6 +108,7 @@ static const char *mes_v12_0_opcodes[] = {
> "SET_SE_MODE",
> "SET_GANG_SUBMIT",
> "SET_HW_RSRC_1",
> + "INVALIDATE_TLBS",
> };
>
> static const char *mes_v12_0_misc_opcodes[] = {
> @@ -879,6 +880,47 @@ static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
> offsetof(union MESAPI__RESET, api_status));
> }
>
> +static int mes_v12_inv_tlb_convert_hub_id(enum amdgpu_mes_hub_id id)
> +{
> + int mes_hub_id = id;
Leftover unused variable. with that dropped, the patch is:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
> + /*
> + * MES doesn't support invalidate gc_hub on slave xcc individually
> + * master xcc will invalidate all gc_hub for the partition
> + */
> + if (AMDGPU_IS_GFXHUB(id))
> + return 0;
> + else if (AMDGPU_IS_MMHUB0(id))
> + return 1;
> + else
> + return -EINVAL;
> +
> +}
> +
> +static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes,
> + struct mes_inv_tlbs_pasid_input *input)
> +{
> + union MESAPI__INV_TLBS mes_inv_tlbs;
> +
> + memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
> +
> + mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
> + mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
> + mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
> +
> + mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
> + mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
> + mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
> +
> + /*convert amdgpu_mes_hub_id to mes expected hub_id */
> + mes_inv_tlbs.invalidate_tlbs.hub_id = mes_v12_inv_tlb_convert_hub_id(input->hub_id);
> + if (mes_inv_tlbs.invalidate_tlbs.hub_id < 0)
> + return -EINVAL;
> + return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_KIQ_PIPE,
> + &mes_inv_tlbs, sizeof(mes_inv_tlbs),
> + offsetof(union MESAPI__INV_TLBS, api_status));
> +
> +}
> +
> static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
> .add_hw_queue = mes_v12_0_add_hw_queue,
> .remove_hw_queue = mes_v12_0_remove_hw_queue,
> @@ -888,6 +930,7 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
> .resume_gang = mes_v12_0_resume_gang,
> .misc_op = mes_v12_0_misc_op,
> .reset_hw_queue = mes_v12_0_reset_hw_queue,
> + .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid,
> };
>
> static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev,
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2025-08-22 15:12 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-22 15:02 [PATCH 2/2] drm/amd/amdgpu : Use the MES INV_TLBS API for tlb invalidation on gfx12 Shaoyun Liu
2025-08-22 15:12 ` Alex Deucher
-- strict thread matches above, loose matches on Subject: below --
2025-08-21 15:20 Shaoyun Liu
2025-08-22 14:30 ` Alex Deucher
2025-08-20 18:47 Shaoyun Liu
2025-08-20 19:57 ` Alex Deucher
2025-08-21 2:27 ` Liu, Shaoyun
2025-08-21 12:48 ` Alex Deucher
2025-08-06 18:32 Shaoyun Liu
2025-08-06 14:50 Shaoyun Liu
2025-07-30 14:27 Shaoyun Liu
2025-08-06 17:44 ` Alex Deucher
2025-08-06 18:02 ` Liu, Shaoyun
2025-08-06 18:05 ` Alex Deucher
2025-08-06 18:27 ` Liu, Shaoyun
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).