From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Philip Yang <Philip.Yang@amd.com>,
Felix Kuehling <felix.kuehling@amd.com>,
Alex Deucher <alexander.deucher@amd.com>,
Sasha Levin <sashal@kernel.org>,
Felix.Kuehling@amd.com, christian.koenig@amd.com,
Xinhui.Pan@amd.com, airlied@gmail.com, simona@ffwll.ch,
amd-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org
Subject: [PATCH AUTOSEL 6.12 22/31] drm/amdkfd: Queue interrupt work to different CPU
Date: Sun, 26 Jan 2025 09:54:38 -0500 [thread overview]
Message-ID: <20250126145448.930220-22-sashal@kernel.org> (raw)
In-Reply-To: <20250126145448.930220-1-sashal@kernel.org>
From: Philip Yang <Philip.Yang@amd.com>
[ Upstream commit 34db5a32617d102e8042151bb87590e43c97132e ]
For CPX mode, each KFD node has interrupt worker to process ih_fifo to
send events to user space. Currently all interrupt workers of same adev
queue to same CPU, all workers execution are actually serialized and
this cause KFD ih_fifo overflow when CPU usage is high.
Use per-GPU unbounded highpri queue with number of workers equals to
number of partitions, let queue_work select the next CPU round robin
among the local CPUs of same NUMA.
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
drivers/gpu/drm/amd/amdkfd/kfd_device.c | 25 ++++++++--------------
drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 25 ++++++++--------------
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 ++-
3 files changed, 20 insertions(+), 33 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index b05be24531e18..d350c7ce35b3d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -637,6 +637,14 @@ static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes)
struct kfd_node *knode;
unsigned int i;
+ /*
+ * flush_work ensures that there are no outstanding
+ * work-queue items that will access interrupt_ring. New work items
+ * can't be created because we stopped interrupt handling above.
+ */
+ flush_workqueue(kfd->ih_wq);
+ destroy_workqueue(kfd->ih_wq);
+
for (i = 0; i < num_nodes; i++) {
knode = kfd->nodes[i];
device_queue_manager_uninit(knode->dqm);
@@ -1058,21 +1066,6 @@ static int kfd_resume(struct kfd_node *node)
return err;
}
-static inline void kfd_queue_work(struct workqueue_struct *wq,
- struct work_struct *work)
-{
- int cpu, new_cpu;
-
- cpu = new_cpu = smp_processor_id();
- do {
- new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
- if (cpu_to_node(new_cpu) == numa_node_id())
- break;
- } while (cpu != new_cpu);
-
- queue_work_on(new_cpu, wq, work);
-}
-
/* This is called directly from KGD at ISR. */
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
{
@@ -1098,7 +1091,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
patched_ihre, &is_patched)
&& enqueue_ih_ring_entry(node,
is_patched ? patched_ihre : ih_ring_entry)) {
- kfd_queue_work(node->ih_wq, &node->interrupt_work);
+ queue_work(node->kfd->ih_wq, &node->interrupt_work);
spin_unlock_irqrestore(&node->interrupt_lock, flags);
return;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index 9b6b6e8825934..15b4b70cf1997 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -62,11 +62,14 @@ int kfd_interrupt_init(struct kfd_node *node)
return r;
}
- node->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1);
- if (unlikely(!node->ih_wq)) {
- kfifo_free(&node->ih_fifo);
- dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n");
- return -ENOMEM;
+ if (!node->kfd->ih_wq) {
+ node->kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI | WQ_UNBOUND,
+ node->kfd->num_nodes);
+ if (unlikely(!node->kfd->ih_wq)) {
+ kfifo_free(&node->ih_fifo);
+ dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n");
+ return -ENOMEM;
+ }
}
spin_lock_init(&node->interrupt_lock);
@@ -96,16 +99,6 @@ void kfd_interrupt_exit(struct kfd_node *node)
spin_lock_irqsave(&node->interrupt_lock, flags);
node->interrupts_active = false;
spin_unlock_irqrestore(&node->interrupt_lock, flags);
-
- /*
- * flush_work ensures that there are no outstanding
- * work-queue items that will access interrupt_ring. New work items
- * can't be created because we stopped interrupt handling above.
- */
- flush_workqueue(node->ih_wq);
-
- destroy_workqueue(node->ih_wq);
-
kfifo_free(&node->ih_fifo);
}
@@ -162,7 +155,7 @@ static void interrupt_wq(struct work_struct *work)
/* If we spent more than a second processing signals,
* reschedule the worker to avoid soft-lockup warnings
*/
- queue_work(dev->ih_wq, &dev->interrupt_work);
+ queue_work(dev->kfd->ih_wq, &dev->interrupt_work);
break;
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 26e48fdc87289..75523f30cd38b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -273,7 +273,6 @@ struct kfd_node {
/* Interrupts */
struct kfifo ih_fifo;
- struct workqueue_struct *ih_wq;
struct work_struct interrupt_work;
spinlock_t interrupt_lock;
@@ -366,6 +365,8 @@ struct kfd_dev {
struct kfd_node *nodes[MAX_KFD_NODES];
unsigned int num_nodes;
+ struct workqueue_struct *ih_wq;
+
/* Kernel doorbells for KFD device */
struct amdgpu_bo *doorbells;
--
2.39.5
next prev parent reply other threads:[~2025-01-26 14:55 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-01-26 14:54 [PATCH AUTOSEL 6.12 01/31] drm/virtio: New fence for every plane update Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 02/31] drm: Add panel backlight quirks Sasha Levin
2025-01-26 16:28 ` Thomas Weißschuh
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 03/31] drm: panel-backlight-quirks: Add Framework 13 matte panel Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 04/31] drm: panel-backlight-quirks: Add Framework 13 glossy and 2.8k panels Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 05/31] nvkm/gsp: correctly advance the read pointer of GSP message queue Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 06/31] nvkm: correctly calculate the available space of the GSP cmdq buffer Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 07/31] drm/tests: hdmi: handle empty modes in find_preferred_mode() Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 08/31] drm/tests: hdmi: return meaningful value from set_connector_edid() Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 09/31] drm/amd/display: Populate chroma prefetch parameters, DET buffer fix Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 10/31] drm/amd/display: Overwriting dualDPP UBF values before usage Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 11/31] printk: Fix signed integer overflow when defining LOG_BUF_LEN_MAX Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 12/31] drm/connector: add mutex to protect ELD from concurrent access Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 13/31] drm/bridge: anx7625: use eld_mutex to protect access to connector->eld Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 14/31] drm/bridge: ite-it66121: " Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 15/31] drm/amd/display: " Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 16/31] drm/exynos: hdmi: " Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 17/31] drm/radeon: " Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 18/31] drm/sti: hdmi: " Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 19/31] drm/vc4: " Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 20/31] drm/amd/display: Fix Mode Cutoff in DSC Passthrough to DP2.1 Monitor Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 21/31] drm/amdgpu: Don't enable sdma 4.4.5 CTXEMPTY interrupt Sasha Levin
2025-01-26 14:54 ` Sasha Levin [this message]
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 23/31] drm/bridge: it6505: Change definition MAX_HDCP_DOWN_STREAM_COUNT Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 24/31] drm/bridge: it6505: fix HDCP Bstatus check Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 25/31] drm/bridge: it6505: fix HDCP encryption when R0 ready Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 26/31] drm/bridge: it6505: fix HDCP CTS compare V matching Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 27/31] drm/bridge: it6505: fix HDCP CTS KSV list wait timer Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 28/31] safesetid: check size of policy writes Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 29/31] drm/amd/display: Increase sanitizer frame larger than limit when compile testing with clang Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 30/31] drm/amd/display: Limit Scaling Ratio on DCN3.01 Sasha Levin
2025-01-26 14:54 ` [PATCH AUTOSEL 6.12 31/31] ring-buffer: Make reading page consistent with the code logic Sasha Levin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250126145448.930220-22-sashal@kernel.org \
--to=sashal@kernel.org \
--cc=Philip.Yang@amd.com \
--cc=Xinhui.Pan@amd.com \
--cc=airlied@gmail.com \
--cc=alexander.deucher@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
--cc=christian.koenig@amd.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=felix.kuehling@amd.com \
--cc=linux-kernel@vger.kernel.org \
--cc=simona@ffwll.ch \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.