From: Amber Lin <Amber.Lin@amd.com>
To: <amd-gfx@lists.freedesktop.org>
Cc: <Shaoyun.Liu@amd.com>, <Michael.Chen@amd.com>,
<Jesse.Zhang@amd.com>, Amber Lin <Amber.Lin@amd.com>
Subject: [PATCH 5/8] drm/amdgpu: Missing multi-XCC support in MES
Date: Fri, 20 Mar 2026 16:02:05 -0400 [thread overview]
Message-ID: <20260320200208.1188307-6-Amber.Lin@amd.com> (raw)
In-Reply-To: <20260320200208.1188307-1-Amber.Lin@amd.com>
In a multi-XCC GPU, pass the master XCC's ID to amdgpu_mes_suspend,
amdgpu_mes_resume, and detect_and_reset_hung_queues so the command will be
sent to the matching master MES when the compute partition mode is not
SPX.
Signed-off-by: Amber Lin <Amber.Lin@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 7 +++++--
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 9 +++++----
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +-
drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 2 +-
drivers/gpu/drm/amd/amdgpu/mes_v12_1.c | 14 +-------------
.../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 4 ++--
6 files changed, 15 insertions(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 710bca87c32b..4f44b933e373 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -300,7 +300,7 @@ void amdgpu_mes_fini(struct amdgpu_device *adev)
mutex_destroy(&adev->mes.mutex_hidden);
}
-int amdgpu_mes_suspend(struct amdgpu_device *adev)
+int amdgpu_mes_suspend(struct amdgpu_device *adev, uint32_t xcc_id)
{
struct mes_suspend_gang_input input;
int r;
@@ -310,6 +310,7 @@ int amdgpu_mes_suspend(struct amdgpu_device *adev)
memset(&input, 0x0, sizeof(struct mes_suspend_gang_input));
input.suspend_all_gangs = 1;
+ input.xcc_id = xcc_id;
/*
* Avoid taking any other locks under MES lock to avoid circular
@@ -324,7 +325,7 @@ int amdgpu_mes_suspend(struct amdgpu_device *adev)
return r;
}
-int amdgpu_mes_resume(struct amdgpu_device *adev)
+int amdgpu_mes_resume(struct amdgpu_device *adev, uint32_t xcc_id)
{
struct mes_resume_gang_input input;
int r;
@@ -334,6 +335,7 @@ int amdgpu_mes_resume(struct amdgpu_device *adev)
memset(&input, 0x0, sizeof(struct mes_resume_gang_input));
input.resume_all_gangs = 1;
+ input.xcc_id = xcc_id;
/*
* Avoid taking any other locks under MES lock to avoid circular
@@ -462,6 +464,7 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
adev->mes.hung_queue_db_array_size * sizeof(u32));
input.queue_type = queue_type;
input.detect_only = detect_only;
+ input.xcc_id = xcc_id;
r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes,
&input);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 2e6ae9f84db0..643b4f8d757a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -325,8 +325,9 @@ struct mes_reset_queue_input {
};
struct mes_detect_and_reset_queue_input {
- uint32_t queue_type;
- bool detect_only;
+ uint32_t queue_type;
+ bool detect_only;
+ uint32_t xcc_id;
};
struct mes_inv_tlbs_pasid_input {
@@ -442,8 +443,8 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
int amdgpu_mes_init(struct amdgpu_device *adev);
void amdgpu_mes_fini(struct amdgpu_device *adev);
-int amdgpu_mes_suspend(struct amdgpu_device *adev);
-int amdgpu_mes_resume(struct amdgpu_device *adev);
+int amdgpu_mes_suspend(struct amdgpu_device *adev, uint32_t xcc_id);
+int amdgpu_mes_resume(struct amdgpu_device *adev, uint32_t xcc_id);
int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring, uint32_t xcc_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 78d1f3eb522e..35734d34763a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -5200,7 +5200,7 @@ static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
/**
* GFX soft reset will impact MES, need resume MES when do GFX soft reset
*/
- return amdgpu_mes_resume(adev);
+ return amdgpu_mes_resume(adev, 0);
}
static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index 9508709abd49..d02a84711394 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -266,7 +266,7 @@ static int mes_userq_detect_and_reset(struct amdgpu_device *adev,
if (found_hung_queue) {
/* Resume scheduling after hang recovery */
- r = amdgpu_mes_resume(adev);
+ r = amdgpu_mes_resume(adev, input.xcc_id);
}
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
index 70d80c2aed52..4b279259b9d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
@@ -1888,24 +1888,12 @@ static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block)
static int mes_v12_1_suspend(struct amdgpu_ip_block *ip_block)
{
- int r;
-
- r = amdgpu_mes_suspend(ip_block->adev);
- if (r)
- return r;
-
return mes_v12_1_hw_fini(ip_block);
}
static int mes_v12_1_resume(struct amdgpu_ip_block *ip_block)
{
- int r;
-
- r = mes_v12_1_hw_init(ip_block);
- if (r)
- return r;
-
- return amdgpu_mes_resume(ip_block->adev);
+ return mes_v12_1_hw_init(ip_block);
}
static int mes_v12_1_early_init(struct amdgpu_ip_block *ip_block)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 18bc5ba25f8f..ec8d7f4be840 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -367,7 +367,7 @@ static int suspend_all_queues_mes(struct device_queue_manager *dqm)
if (!down_read_trylock(&adev->reset_domain->sem))
return -EIO;
- r = amdgpu_mes_suspend(adev);
+ r = amdgpu_mes_suspend(adev, ffs(dqm->dev->xcc_mask) - 1);
up_read(&adev->reset_domain->sem);
if (r) {
@@ -387,7 +387,7 @@ static int resume_all_queues_mes(struct device_queue_manager *dqm)
if (!down_read_trylock(&adev->reset_domain->sem))
return -EIO;
- r = amdgpu_mes_resume(adev);
+ r = amdgpu_mes_resume(adev, ffs(dqm->dev->xcc_mask) - 1);
up_read(&adev->reset_domain->sem);
if (r) {
--
2.43.0
next prev parent reply other threads:[~2026-03-20 20:02 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-20 20:02 [PATCH 0/8] Support compute queue/pipe reset on gfx 12.1 Amber Lin
2026-03-20 20:02 ` [PATCH 1/8] drm/amdgpu: Fix gfx_hqd_mask in mes 12.1 Amber Lin
2026-03-23 19:03 ` Alex Deucher
2026-03-20 20:02 ` [PATCH 2/8] drm/amdgpu: Fixup boost mes detect hang array size Amber Lin
2026-03-23 19:04 ` Alex Deucher
2026-03-23 19:15 ` Amber Lin
2026-03-20 20:02 ` [PATCH 3/8] drm/amdgpu: Fixup detect and reset Amber Lin
2026-03-23 19:07 ` Alex Deucher
2026-03-20 20:02 ` [PATCH 4/8] drm/amdgpu: Create hqd info structure Amber Lin
2026-03-23 19:01 ` Alex Deucher
2026-03-23 19:11 ` Amber Lin
2026-03-20 20:02 ` Amber Lin [this message]
2026-03-23 19:10 ` [PATCH 5/8] drm/amdgpu: Missing multi-XCC support in MES Alex Deucher
2026-03-23 19:19 ` Amber Lin
2026-03-20 20:02 ` [PATCH 6/8] drm/amdgpu: Enable suspend/resume gang in mes 12.1 Amber Lin
2026-03-23 19:11 ` Alex Deucher
2026-03-20 20:02 ` [PATCH 7/8] drm/amdkfd: Add detect+reset hangs to GC 12.1 Amber Lin
2026-03-23 19:12 ` Alex Deucher
2026-03-20 20:02 ` [PATCH 8/8] drm/amdkfd: Reset queue/pipe in MES Amber Lin
2026-03-23 19:21 ` Alex Deucher
2026-03-23 19:42 ` Amber Lin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260320200208.1188307-6-Amber.Lin@amd.com \
--to=amber.lin@amd.com \
--cc=Jesse.Zhang@amd.com \
--cc=Michael.Chen@amd.com \
--cc=Shaoyun.Liu@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.