All of lore.kernel.org
 help / color / mirror / Atom feed
From: Amber Lin <Amber.Lin@amd.com>
To: <amd-gfx@lists.freedesktop.org>, <alexdeucher@gmail.com>
Cc: <Shaoyun.Liu@amd.com>, <Michael.Chen@amd.com>,
	<Jesse.Zhang@amd.com>, Amber Lin <Amber.Lin@amd.com>
Subject: [PATCH v2 06/10] drm/amdgpu: Missing multi-XCC support in MES
Date: Tue, 24 Mar 2026 13:56:48 -0400	[thread overview]
Message-ID: <20260324175653.1325754-7-Amber.Lin@amd.com> (raw)
In-Reply-To: <20260324175653.1325754-1-Amber.Lin@amd.com>

In a multi-XCC GPU, pass the master XCC's ID to amdgpu_mes_suspend,
amdgpu_mes_resume, and detect_and_reset_hung_queues so the command will be
sent to the matching master MES when the compute partition mode is not
SPX.

Signed-off-by: Amber Lin <Amber.Lin@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c               | 7 +++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h               | 9 +++++----
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c                | 2 +-
 drivers/gpu/drm/amd/amdgpu/mes_userqueue.c            | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 4 ++--
 5 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index d778c3da8203..f3a4ae1fd521 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -300,7 +300,7 @@ void amdgpu_mes_fini(struct amdgpu_device *adev)
 	mutex_destroy(&adev->mes.mutex_hidden);
 }
 
-int amdgpu_mes_suspend(struct amdgpu_device *adev)
+int amdgpu_mes_suspend(struct amdgpu_device *adev, uint32_t xcc_id)
 {
 	struct mes_suspend_gang_input input;
 	int r;
@@ -310,6 +310,7 @@ int amdgpu_mes_suspend(struct amdgpu_device *adev)
 
 	memset(&input, 0x0, sizeof(struct mes_suspend_gang_input));
 	input.suspend_all_gangs = 1;
+	input.xcc_id = xcc_id;
 
 	/*
 	 * Avoid taking any other locks under MES lock to avoid circular
@@ -324,7 +325,7 @@ int amdgpu_mes_suspend(struct amdgpu_device *adev)
 	return r;
 }
 
-int amdgpu_mes_resume(struct amdgpu_device *adev)
+int amdgpu_mes_resume(struct amdgpu_device *adev, uint32_t xcc_id)
 {
 	struct mes_resume_gang_input input;
 	int r;
@@ -334,6 +335,7 @@ int amdgpu_mes_resume(struct amdgpu_device *adev)
 
 	memset(&input, 0x0, sizeof(struct mes_resume_gang_input));
 	input.resume_all_gangs = 1;
+	input.xcc_id = xcc_id;
 
 	/*
 	 * Avoid taking any other locks under MES lock to avoid circular
@@ -462,6 +464,7 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
 		adev->mes.hung_queue_db_array_size * sizeof(u32));
 	input.queue_type = queue_type;
 	input.detect_only = detect_only;
+	input.xcc_id = xcc_id;
 
 	r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes,
 							  &input);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 2e6ae9f84db0..643b4f8d757a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -325,8 +325,9 @@ struct mes_reset_queue_input {
 };
 
 struct mes_detect_and_reset_queue_input {
-	uint32_t                           queue_type;
-	bool                               detect_only;
+	uint32_t	queue_type;
+	bool		detect_only;
+	uint32_t	xcc_id;
 };
 
 struct mes_inv_tlbs_pasid_input {
@@ -442,8 +443,8 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
 int amdgpu_mes_init(struct amdgpu_device *adev);
 void amdgpu_mes_fini(struct amdgpu_device *adev);
 
-int amdgpu_mes_suspend(struct amdgpu_device *adev);
-int amdgpu_mes_resume(struct amdgpu_device *adev);
+int amdgpu_mes_suspend(struct amdgpu_device *adev, uint32_t xcc_id);
+int amdgpu_mes_resume(struct amdgpu_device *adev, uint32_t xcc_id);
 
 int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
 				struct amdgpu_ring *ring, uint32_t xcc_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 78d1f3eb522e..35734d34763a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -5200,7 +5200,7 @@ static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
 	/**
 	 * GFX soft reset will impact MES, need resume MES when do GFX soft reset
 	 */
-	return amdgpu_mes_resume(adev);
+	return amdgpu_mes_resume(adev, 0);
 }
 
 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index 9508709abd49..d02a84711394 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -266,7 +266,7 @@ static int mes_userq_detect_and_reset(struct amdgpu_device *adev,
 
 	if (found_hung_queue) {
 		/* Resume scheduling after hang recovery */
-		r = amdgpu_mes_resume(adev);
+		r = amdgpu_mes_resume(adev, input.xcc_id);
 	}
 
 	return r;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 18bc5ba25f8f..ec8d7f4be840 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -367,7 +367,7 @@ static int suspend_all_queues_mes(struct device_queue_manager *dqm)
 	if (!down_read_trylock(&adev->reset_domain->sem))
 		return -EIO;
 
-	r = amdgpu_mes_suspend(adev);
+	r = amdgpu_mes_suspend(adev, ffs(dqm->dev->xcc_mask) - 1);
 	up_read(&adev->reset_domain->sem);
 
 	if (r) {
@@ -387,7 +387,7 @@ static int resume_all_queues_mes(struct device_queue_manager *dqm)
 	if (!down_read_trylock(&adev->reset_domain->sem))
 		return -EIO;
 
-	r = amdgpu_mes_resume(adev);
+	r = amdgpu_mes_resume(adev, ffs(dqm->dev->xcc_mask) - 1);
 	up_read(&adev->reset_domain->sem);
 
 	if (r) {
-- 
2.43.0


  parent reply	other threads:[~2026-03-24 17:57 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-24 17:56 [PATCH 0/8] Support compute queue/pipe reset on gfx 12.1 Amber Lin
2026-03-24 17:56 ` [PATCH v2 01/10] drm/amdgpu: Fix gfx_hqd_mask in mes 12.1 Amber Lin
2026-03-24 17:56 ` [PATCH v2 02/10] drm/amdgpu: Fixup boost mes detect hang array size Amber Lin
2026-03-26 18:03   ` Alex Deucher
2026-03-24 17:56 ` [PATCH v2 03/10] drm/amdgpu: Fixup detect and reset Amber Lin
2026-03-24 17:56 ` [PATCH v2 04/10] drm/amdgpu: Create hqd info structure Amber Lin
2026-03-26 17:56   ` Alex Deucher
2026-03-26 20:34     ` Amber Lin
2026-03-24 17:56 ` [PATCH v2 05/10] drm/amdgpu: Update mes 12.1's suspend/resume Amber Lin
2026-03-26 17:57   ` Alex Deucher
2026-03-24 17:56 ` Amber Lin [this message]
2026-03-26 18:02   ` [PATCH v2 06/10] drm/amdgpu: Missing multi-XCC support in MES Alex Deucher
2026-03-24 17:56 ` [PATCH v2 07/10] drm/amdgpu: Enable suspend/resume gang in mes 12.1 Amber Lin
2026-03-26 18:03   ` Alex Deucher
2026-03-24 17:56 ` [PATCH v2 08/10] drm/amdkfd: Add detect+reset hangs to GC 12.1 Amber Lin
2026-03-24 17:56 ` [PATCH v2 09/10] drm/amdkfd: Reset queue/pipe in MES Amber Lin
2026-03-26 16:06   ` Liu, Shaoyun
2026-03-26 17:31     ` Amber Lin
2026-03-26 18:19       ` Liu, Shaoyun
2026-03-26 18:51   ` Alex Deucher
2026-03-26 19:40     ` Amber Lin
2026-03-26 21:08       ` Alex Deucher
2026-03-26 21:35         ` Amber Lin
2026-04-13 18:50     ` Amber Lin
2026-03-24 17:56 ` [PATCH v2 10/10] drm/amdkfd: Queue reset support in KFD topology Amber Lin
2026-03-26 18:27   ` Alex Deucher

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260324175653.1325754-7-Amber.Lin@amd.com \
    --to=amber.lin@amd.com \
    --cc=Jesse.Zhang@amd.com \
    --cc=Michael.Chen@amd.com \
    --cc=Shaoyun.Liu@amd.com \
    --cc=alexdeucher@gmail.com \
    --cc=amd-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.