From: Yichen Wang <yichen.wang@bytedance.com>
To: "Dr. David Alan Gilbert" <dave@treblig.org>,
"Paolo Bonzini" <pbonzini@redhat.com>,
"Marc-André Lureau" <marcandre.lureau@redhat.com>,
"Daniel P. Berrangé" <berrange@redhat.com>,
"Philippe Mathieu-Daudé" <philmd@linaro.org>,
"Peter Xu" <peterx@redhat.com>, "Fabiano Rosas" <farosas@suse.de>,
"Eric Blake" <eblake@redhat.com>,
"Markus Armbruster" <armbru@redhat.com>,
"Michael S. Tsirkin" <mst@redhat.com>,
"Cornelia Huck" <cohuck@redhat.com>,
qemu-devel@nongnu.org
Cc: "Hao Xiang" <hao.xiang@linux.dev>,
"Liu, Yuan1" <yuan1.liu@intel.com>,
"Shivam Kumar" <shivam.kumar1@nutanix.com>,
"Ho-Ren (Jack) Chuang" <horenchuang@bytedance.com>,
"Yichen Wang" <yichen.wang@bytedance.com>
Subject: [PATCH v6 05/12] util/dsa: Implement DSA task asynchronous completion thread model.
Date: Wed, 9 Oct 2024 16:46:03 -0700 [thread overview]
Message-ID: <20241009234610.27039-6-yichen.wang@bytedance.com> (raw)
In-Reply-To: <20241009234610.27039-1-yichen.wang@bytedance.com>
From: Hao Xiang <hao.xiang@linux.dev>
* Create a dedicated thread for DSA task completion.
* DSA completion thread runs a loop and poll for completed tasks.
* Start and stop DSA completion thread during DSA device start stop.
User space application can directly submit task to Intel DSA
accelerator by writing to DSA's device memory (mapped in user space).
Once a task is submitted, the device starts processing it and write
the completion status back to the task. A user space application can
poll the task's completion status to check for completion. This change
uses a dedicated thread to perform DSA task completion checking.
Signed-off-by: Hao Xiang <hao.xiang@linux.dev>
Signed-off-by: Yichen Wang <yichen.wang@bytedance.com>
---
include/qemu/dsa.h | 1 +
util/dsa.c | 274 ++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 274 insertions(+), 1 deletion(-)
diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h
index f39533f4ac..7b30303791 100644
--- a/include/qemu/dsa.h
+++ b/include/qemu/dsa.h
@@ -69,6 +69,7 @@ typedef struct QemuDsaBatchTask {
QemuDsaTaskType task_type;
QemuDsaTaskStatus status;
int batch_size;
+ bool *results;
QSIMPLEQ_ENTRY(QemuDsaBatchTask) entry;
} QemuDsaBatchTask;
diff --git a/util/dsa.c b/util/dsa.c
index 79e305cb6e..56828bec21 100644
--- a/util/dsa.c
+++ b/util/dsa.c
@@ -33,9 +33,20 @@
#define DSA_WQ_PORTAL_SIZE 4096
#define DSA_WQ_DEPTH 128
#define MAX_DSA_DEVICES 16
+#define DSA_COMPLETION_THREAD "qemu_dsa_completion"
+
+typedef struct {
+ bool stopping;
+ bool running;
+ QemuThread thread;
+ int thread_id;
+ QemuSemaphore sem_init_done;
+ QemuDsaDeviceGroup *group;
+} QemuDsaCompletionThread;
uint32_t max_retry_count;
static QemuDsaDeviceGroup dsa_group;
+static QemuDsaCompletionThread completion_thread;
/**
@@ -405,6 +416,265 @@ submit_batch_wi_async(QemuDsaBatchTask *batch_task)
return dsa_task_enqueue(device_group, batch_task);
}
+/**
+ * @brief Poll for the DSA work item completion.
+ *
+ * @param completion A pointer to the DSA work item completion record.
+ * @param opcode The DSA opcode.
+ *
+ * @return Zero if successful, non-zero otherwise.
+ */
+static int
+poll_completion(struct dsa_completion_record *completion,
+ enum dsa_opcode opcode)
+{
+ uint8_t status;
+ uint64_t retry = 0;
+
+ while (true) {
+ /* The DSA operation completes successfully or fails. */
+ status = completion->status;
+ if (status == DSA_COMP_SUCCESS ||
+ status == DSA_COMP_PAGE_FAULT_NOBOF ||
+ status == DSA_COMP_BATCH_PAGE_FAULT ||
+ status == DSA_COMP_BATCH_FAIL) {
+ break;
+ } else if (status != DSA_COMP_NONE) {
+ error_report("DSA opcode %d failed with status = %d.",
+ opcode, status);
+ return 1;
+ }
+ retry++;
+ if (retry > max_retry_count) {
+ error_report("DSA wait for completion retry %lu times.", retry);
+ return 1;
+ }
+ _mm_pause();
+ }
+
+ return 0;
+}
+
+/**
+ * @brief Complete a single DSA task in the batch task.
+ *
+ * @param task A pointer to the batch task structure.
+ *
+ * @return Zero if successful, otherwise non-zero.
+ */
+static int
+poll_task_completion(QemuDsaBatchTask *task)
+{
+ assert(task->task_type == QEMU_DSA_TASK);
+
+ struct dsa_completion_record *completion = &task->completions[0];
+ uint8_t status;
+ int ret;
+
+ ret = poll_completion(completion, task->descriptors[0].opcode);
+ if (ret != 0) {
+ goto exit;
+ }
+
+ status = completion->status;
+ if (status == DSA_COMP_SUCCESS) {
+ task->results[0] = (completion->result == 0);
+ goto exit;
+ }
+
+ assert(status == DSA_COMP_PAGE_FAULT_NOBOF);
+
+exit:
+ return ret;
+}
+
+/**
+ * @brief Poll a batch task status until it completes. If DSA task doesn't
+ * complete properly, use CPU to complete the task.
+ *
+ * @param batch_task A pointer to the DSA batch task.
+ *
+ * @return Zero if successful, otherwise non-zero.
+ */
+static int
+poll_batch_task_completion(QemuDsaBatchTask *batch_task)
+{
+ struct dsa_completion_record *batch_completion =
+ &batch_task->batch_completion;
+ struct dsa_completion_record *completion;
+ uint8_t batch_status;
+ uint8_t status;
+ bool *results = batch_task->results;
+ uint32_t count = batch_task->batch_descriptor.desc_count;
+ int ret;
+
+ ret = poll_completion(batch_completion,
+ batch_task->batch_descriptor.opcode);
+ if (ret != 0) {
+ goto exit;
+ }
+
+ batch_status = batch_completion->status;
+
+ if (batch_status == DSA_COMP_SUCCESS) {
+ if (batch_completion->bytes_completed == count) {
+ /*
+ * Let's skip checking for each descriptors' completion status
+ * if the batch descriptor says all succedded.
+ */
+ for (int i = 0; i < count; i++) {
+ assert(batch_task->completions[i].status == DSA_COMP_SUCCESS);
+ results[i] = (batch_task->completions[i].result == 0);
+ }
+ goto exit;
+ }
+ } else {
+ assert(batch_status == DSA_COMP_BATCH_FAIL ||
+ batch_status == DSA_COMP_BATCH_PAGE_FAULT);
+ }
+
+ for (int i = 0; i < count; i++) {
+
+ completion = &batch_task->completions[i];
+ status = completion->status;
+
+ if (status == DSA_COMP_SUCCESS) {
+ results[i] = (completion->result == 0);
+ continue;
+ }
+
+ assert(status == DSA_COMP_PAGE_FAULT_NOBOF);
+
+ if (status != DSA_COMP_PAGE_FAULT_NOBOF) {
+ error_report("Unexpected DSA completion status = %u.", status);
+ ret = 1;
+ goto exit;
+ }
+ }
+
+exit:
+ return ret;
+}
+
+/**
+ * @brief Handles an asynchronous DSA batch task completion.
+ *
+ * @param task A pointer to the batch buffer zero task structure.
+ */
+static void
+dsa_batch_task_complete(QemuDsaBatchTask *batch_task)
+{
+ batch_task->status = QEMU_DSA_TASK_COMPLETION;
+ batch_task->completion_callback(batch_task);
+}
+
+/**
+ * @brief The function entry point called by a dedicated DSA
+ * work item completion thread.
+ *
+ * @param opaque A pointer to the thread context.
+ *
+ * @return void* Not used.
+ */
+static void *
+dsa_completion_loop(void *opaque)
+{
+ QemuDsaCompletionThread *thread_context =
+ (QemuDsaCompletionThread *)opaque;
+ QemuDsaBatchTask *batch_task;
+ QemuDsaDeviceGroup *group = thread_context->group;
+ int ret;
+
+ rcu_register_thread();
+
+ thread_context->thread_id = qemu_get_thread_id();
+ qemu_sem_post(&thread_context->sem_init_done);
+
+ while (thread_context->running) {
+ batch_task = dsa_task_dequeue(group);
+ assert(batch_task != NULL || !group->running);
+ if (!group->running) {
+ assert(!thread_context->running);
+ break;
+ }
+ if (batch_task->task_type == QEMU_DSA_TASK) {
+ ret = poll_task_completion(batch_task);
+ } else {
+ assert(batch_task->task_type == QEMU_DSA_BATCH_TASK);
+ ret = poll_batch_task_completion(batch_task);
+ }
+
+ if (ret != 0) {
+ goto exit;
+ }
+
+ dsa_batch_task_complete(batch_task);
+ }
+
+exit:
+ if (ret != 0) {
+ error_report("DSA completion thread exited due to internal error.");
+ }
+ rcu_unregister_thread();
+ return NULL;
+}
+
+/**
+ * @brief Initializes a DSA completion thread.
+ *
+ * @param completion_thread A pointer to the completion thread context.
+ * @param group A pointer to the DSA device group.
+ */
+static void
+dsa_completion_thread_init(
+ QemuDsaCompletionThread *completion_thread,
+ QemuDsaDeviceGroup *group)
+{
+ completion_thread->stopping = false;
+ completion_thread->running = true;
+ completion_thread->thread_id = -1;
+ qemu_sem_init(&completion_thread->sem_init_done, 0);
+ completion_thread->group = group;
+
+ qemu_thread_create(&completion_thread->thread,
+ DSA_COMPLETION_THREAD,
+ dsa_completion_loop,
+ completion_thread,
+ QEMU_THREAD_JOINABLE);
+
+ /* Wait for initialization to complete */
+ qemu_sem_wait(&completion_thread->sem_init_done);
+}
+
+/**
+ * @brief Stops the completion thread (and implicitly, the device group).
+ *
+ * @param opaque A pointer to the completion thread.
+ */
+static void dsa_completion_thread_stop(void *opaque)
+{
+ QemuDsaCompletionThread *thread_context =
+ (QemuDsaCompletionThread *)opaque;
+
+ QemuDsaDeviceGroup *group = thread_context->group;
+
+ qemu_mutex_lock(&group->task_queue_lock);
+
+ thread_context->stopping = true;
+ thread_context->running = false;
+
+ /* Prevent the compiler from setting group->running first. */
+ barrier();
+ dsa_device_group_stop(group);
+
+ qemu_cond_signal(&group->task_queue_cond);
+ qemu_mutex_unlock(&group->task_queue_lock);
+
+ qemu_thread_join(&thread_context->thread);
+
+ qemu_sem_destroy(&thread_context->sem_init_done);
+}
+
/**
* @brief Check if DSA is running.
*
@@ -412,7 +682,7 @@ submit_batch_wi_async(QemuDsaBatchTask *batch_task)
*/
bool qemu_dsa_is_running(void)
{
- return false;
+ return completion_thread.running;
}
static void
@@ -453,6 +723,7 @@ void qemu_dsa_start(void)
return;
}
dsa_device_group_start(&dsa_group);
+ dsa_completion_thread_init(&completion_thread, &dsa_group);
}
/**
@@ -467,6 +738,7 @@ void qemu_dsa_stop(void)
return;
}
+ dsa_completion_thread_stop(&completion_thread);
dsa_empty_task_queue(group);
}
--
Yichen Wang
next prev parent reply other threads:[~2024-10-09 23:51 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-09 23:45 [PATCH v6 00/12] Use Intel DSA accelerator to offload zero page checking in multifd live migration Yichen Wang
2024-10-09 23:45 ` [PATCH v6 01/12] meson: Introduce new instruction set enqcmd to the build system Yichen Wang
2024-10-09 23:46 ` [PATCH v6 02/12] util/dsa: Add idxd into linux header copy list Yichen Wang
2024-10-09 23:46 ` [PATCH v6 03/12] util/dsa: Implement DSA device start and stop logic Yichen Wang
2024-10-16 18:59 ` Peter Xu
2024-10-16 21:00 ` Fabiano Rosas
2024-10-09 23:46 ` [PATCH v6 04/12] util/dsa: Implement DSA task enqueue and dequeue Yichen Wang
2024-10-09 23:46 ` Yichen Wang [this message]
2024-10-09 23:46 ` [PATCH v6 06/12] util/dsa: Implement zero page checking in DSA task Yichen Wang
2024-10-09 23:46 ` [PATCH v6 07/12] util/dsa: Implement DSA task asynchronous submission and wait for completion Yichen Wang
2024-10-09 23:46 ` [PATCH v6 08/12] migration/multifd: Add new migration option for multifd DSA offloading Yichen Wang
2024-10-11 17:14 ` Dr. David Alan Gilbert
2024-10-15 22:09 ` [External] " Yichen Wang
2024-10-15 22:51 ` Dr. David Alan Gilbert
2024-10-09 23:46 ` [PATCH v6 09/12] migration/multifd: Enable DSA offloading in multifd sender path Yichen Wang
2024-10-17 19:11 ` Fabiano Rosas
2024-10-09 23:46 ` [PATCH v6 10/12] migration/multifd: Add migration option set packet size Yichen Wang
2024-10-17 19:16 ` Fabiano Rosas
2024-10-09 23:46 ` [PATCH v6 11/12] util/dsa: Add unit test coverage for Intel DSA task submission and completion Yichen Wang
2024-10-09 23:46 ` [PATCH v6 12/12] migration/multifd: Add integration tests for multifd with Intel DSA offloading Yichen Wang
2024-10-11 14:13 ` [PATCH v6 00/12] Use Intel DSA accelerator to offload zero page checking in multifd live migration Fabiano Rosas
2024-10-15 22:05 ` [External] " Yichen Wang
2024-10-11 16:32 ` Peter Xu
2024-10-11 16:53 ` Dr. David Alan Gilbert
2024-10-15 22:02 ` [External] " Yichen Wang
2024-10-16 19:44 ` Peter Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241009234610.27039-6-yichen.wang@bytedance.com \
--to=yichen.wang@bytedance.com \
--cc=armbru@redhat.com \
--cc=berrange@redhat.com \
--cc=cohuck@redhat.com \
--cc=dave@treblig.org \
--cc=eblake@redhat.com \
--cc=farosas@suse.de \
--cc=hao.xiang@linux.dev \
--cc=horenchuang@bytedance.com \
--cc=marcandre.lureau@redhat.com \
--cc=mst@redhat.com \
--cc=pbonzini@redhat.com \
--cc=peterx@redhat.com \
--cc=philmd@linaro.org \
--cc=qemu-devel@nongnu.org \
--cc=shivam.kumar1@nutanix.com \
--cc=yuan1.liu@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).