qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Hao Xiang <hao.xiang@bytedance.com>
To: quintela@redhat.com, peterx@redhat.com,
	marcandre.lureau@redhat.com, bryan.zhang@bytedance.com,
	qemu-devel@nongnu.org
Cc: Hao Xiang <hao.xiang@bytedance.com>
Subject: [PATCH 10/16] migration/multifd: Enable DSA offloading in multifd sender path.
Date: Wed, 25 Oct 2023 19:38:16 +0000	[thread overview]
Message-ID: <20231025193822.2813204-11-hao.xiang@bytedance.com> (raw)
In-Reply-To: <20231025193822.2813204-1-hao.xiang@bytedance.com>

Multifd sender path gets an array of pages queued by the migration
thread. It performs zero page checking on every page in the array.
The pages are classfied as either a zero page or a normal page. This
change uses Intel DSA to offload the zero page checking from CPU to
the DSA accelerator. The sender thread submits a batch of pages to DSA
hardware and waits for the DSA completion thread to signal for work
completion.

Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
---
 migration/multifd.c | 101 +++++++++++++++++++++++++++++++++++++-------
 migration/multifd.h |   3 ++
 2 files changed, 89 insertions(+), 15 deletions(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index 452fb158b8..79fecbd3ae 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -13,6 +13,8 @@
 #include "qemu/osdep.h"
 #include "qemu/rcu.h"
 #include "qemu/cutils.h"
+#include "qemu/dsa.h"
+#include "qemu/memalign.h"
 #include "exec/target_page.h"
 #include "sysemu/sysemu.h"
 #include "exec/ramblock.h"
@@ -555,6 +557,8 @@ void multifd_save_cleanup(void)
             qemu_thread_join(&p->thread);
         }
     }
+    dsa_stop();
+    dsa_cleanup();
     for (i = 0; i < migrate_multifd_channels(); i++) {
         MultiFDSendParams *p = &multifd_send_state->params[i];
         Error *local_err = NULL;
@@ -571,6 +575,11 @@ void multifd_save_cleanup(void)
         p->name = NULL;
         multifd_pages_clear(p->pages);
         p->pages = NULL;
+        g_free(p->addr);
+        p->addr = NULL;
+        buffer_zero_batch_task_destroy(p->dsa_batch_task);
+        qemu_vfree(p->dsa_batch_task);
+        p->dsa_batch_task = NULL;
         p->packet_len = 0;
         g_free(p->packet);
         p->packet = NULL;
@@ -675,13 +684,71 @@ int multifd_send_sync_main(QEMUFile *f)
     return 0;
 }
 
+static void set_page(MultiFDSendParams *p, bool zero_page, uint64_t offset)
+{
+    RAMBlock *rb = p->pages->block;
+    if (zero_page) {
+        p->zero[p->zero_num] = offset;
+        p->zero_num++;
+        ram_release_page(rb->idstr, offset);
+    } else {
+        p->normal[p->normal_num] = offset;
+        p->normal_num++;
+    }
+}
+
+static void buffer_is_zero_use_cpu(MultiFDSendParams *p)
+{
+    const void **buf = (const void **)p->addr;
+    assert(!migrate_use_main_zero_page());
+    assert(!dsa_is_running());
+
+    for (int i = 0; i < p->pages->num; i++) {
+        p->dsa_batch_task->results[i] = buffer_is_zero(buf[i], p->page_size);
+    }
+}
+
+static void buffer_is_zero_use_dsa(MultiFDSendParams *p)
+{
+    assert(!migrate_use_main_zero_page());
+    assert(dsa_is_running());
+
+    buffer_is_zero_dsa_batch_async(p->dsa_batch_task,
+                                   (const void **)p->addr,
+                                   p->pages->num,
+                                   p->page_size);
+}
+
+static void multifd_zero_page_check(MultiFDSendParams *p)
+{
+    /* older qemu don't understand zero page on multifd channel */
+    bool use_multifd_zero_page = !migrate_use_main_zero_page();
+    bool use_multifd_dsa_accel = dsa_is_running();
+
+    RAMBlock *rb = p->pages->block;
+
+    for (int i = 0; i < p->pages->num; i++) {
+        p->addr[i] = (ram_addr_t)(rb->host + p->pages->offset[i]);
+    }
+
+    if (!use_multifd_zero_page || !use_multifd_dsa_accel) {
+        buffer_is_zero_use_cpu(p);
+    } else {
+        buffer_is_zero_use_dsa(p);
+    }
+
+    for (int i = 0; i < p->pages->num; i++) {
+        uint64_t offset = p->pages->offset[i];
+        bool zero_page = p->dsa_batch_task->results[i];
+        set_page(p, zero_page, offset);
+    }
+}
+
 static void *multifd_send_thread(void *opaque)
 {
     MultiFDSendParams *p = opaque;
     MigrationThread *thread = NULL;
     Error *local_err = NULL;
-    /* older qemu don't understand zero page on multifd channel */
-    bool use_multifd_zero_page = !migrate_use_main_zero_page();
     int ret = 0;
     bool use_zero_copy_send = migrate_zero_copy_send();
 
@@ -707,7 +774,6 @@ static void *multifd_send_thread(void *opaque)
         qemu_mutex_lock(&p->mutex);
 
         if (p->pending_job) {
-            RAMBlock *rb = p->pages->block;
             uint64_t packet_num = p->packet_num;
             p->flags = 0;
             if (p->sync_needed) {
@@ -725,18 +791,7 @@ static void *multifd_send_thread(void *opaque)
                 p->iovs_num = 1;
             }
 
-            for (int i = 0; i < p->pages->num; i++) {
-                uint64_t offset = p->pages->offset[i];
-                if (use_multifd_zero_page &&
-                    buffer_is_zero(rb->host + offset, p->page_size)) {
-                    p->zero[p->zero_num] = offset;
-                    p->zero_num++;
-                    ram_release_page(rb->idstr, offset);
-                } else {
-                    p->normal[p->normal_num] = offset;
-                    p->normal_num++;
-                }
-            }
+            multifd_zero_page_check(p);
 
             if (p->normal_num) {
                 ret = multifd_send_state->ops->send_prepare(p, &local_err);
@@ -958,11 +1013,15 @@ int multifd_save_setup(Error **errp)
     int thread_count;
     uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
     uint8_t i;
+    const char *dsa_parameter = migrate_multifd_dsa_accel();
 
     if (!migrate_multifd()) {
         return 0;
     }
 
+    dsa_init(dsa_parameter);
+    dsa_start();
+
     thread_count = migrate_multifd_channels();
     multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
     multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
@@ -981,6 +1040,10 @@ int multifd_save_setup(Error **errp)
         p->pending_job = 0;
         p->id = i;
         p->pages = multifd_pages_init(page_count);
+        p->addr = g_new0(ram_addr_t, page_count);
+        p->dsa_batch_task = 
+            (struct buffer_zero_batch_task *)qemu_memalign(64, sizeof(*p->dsa_batch_task));
+        buffer_zero_batch_task_init(p->dsa_batch_task, page_count);
         p->packet_len = sizeof(MultiFDPacket_t)
                       + sizeof(uint64_t) * page_count;
         p->packet = g_malloc0(p->packet_len);
@@ -1014,6 +1077,7 @@ int multifd_save_setup(Error **errp)
             return ret;
         }
     }
+
     return 0;
 }
 
@@ -1091,6 +1155,8 @@ void multifd_load_cleanup(void)
 
         qemu_thread_join(&p->thread);
     }
+    dsa_stop();
+    dsa_cleanup();
     for (i = 0; i < migrate_multifd_channels(); i++) {
         MultiFDRecvParams *p = &multifd_recv_state->params[i];
 
@@ -1225,6 +1291,7 @@ int multifd_load_setup(Error **errp)
     int thread_count;
     uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
     uint8_t i;
+    const char *dsa_parameter = migrate_multifd_dsa_accel();
 
     /*
      * Return successfully if multiFD recv state is already initialised
@@ -1234,6 +1301,9 @@ int multifd_load_setup(Error **errp)
         return 0;
     }
 
+    dsa_init(dsa_parameter);
+    dsa_start();
+
     thread_count = migrate_multifd_channels();
     multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
     multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
@@ -1270,6 +1340,7 @@ int multifd_load_setup(Error **errp)
             return ret;
         }
     }
+
     return 0;
 }
 
diff --git a/migration/multifd.h b/migration/multifd.h
index e8f90776bb..297b055e2b 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -114,6 +114,9 @@ typedef struct {
      * pending_job != 0 -> multifd_channel can use it.
      */
     MultiFDPages_t *pages;
+    /* Address of each pages in pages */
+    ram_addr_t *addr;
+    struct buffer_zero_batch_task *dsa_batch_task;
 
     /* thread local variables. No locking required */
 
-- 
2.30.2



  parent reply	other threads:[~2023-10-25 19:41 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-25 19:38 [PATCH 00/16] Use Intel DSA accelerator to offload zero page checking in multifd live migration Hao Xiang
2023-10-25 19:38 ` [PATCH 01/16] Cherry pick a set of patches that enables multifd zero page feature Hao Xiang
2023-10-27 12:30   ` Fabiano Rosas
2023-10-27 13:21     ` Peter Maydell
2023-10-28  1:13       ` [External] " Hao Xiang
2023-10-28  1:06     ` Hao Xiang
2023-10-30 13:58       ` Fabiano Rosas
2023-11-06 18:53         ` Hao Xiang
2023-10-25 19:38 ` [PATCH 02/16] meson: Introduce new instruction set enqcmd to the build system Hao Xiang
2023-10-25 19:38 ` [PATCH 03/16] util/dsa: Add dependency idxd Hao Xiang
2023-10-25 19:38 ` [PATCH 04/16] util/dsa: Implement DSA device start and stop logic Hao Xiang
2023-10-25 19:38 ` [PATCH 05/16] util/dsa: Implement DSA task enqueue and dequeue Hao Xiang
2023-10-25 19:38 ` [PATCH 06/16] util/dsa: Implement DSA task asynchronous completion thread model Hao Xiang
2023-10-25 19:38 ` [PATCH 07/16] util/dsa: Implement zero page checking in DSA task Hao Xiang
2023-10-25 19:38 ` [PATCH 08/16] util/dsa: Implement DSA task asynchronous submission and wait for completion Hao Xiang
2023-10-25 19:38 ` [PATCH 09/16] migration/multifd: Add new migration option for multifd DSA offloading Hao Xiang
2023-10-30 14:41   ` Fabiano Rosas
2023-11-06 21:58     ` [External] " Hao Xiang
2023-10-25 19:38 ` Hao Xiang [this message]
2023-10-30 14:37   ` [PATCH 10/16] migration/multifd: Enable DSA offloading in multifd sender path Fabiano Rosas
2023-10-25 19:38 ` [PATCH 11/16] migration/multifd: Add test hook to set normal page ratio Hao Xiang
2023-10-25 19:38 ` [PATCH 12/16] migration/multifd: Enable set normal page ratio test hook in multifd Hao Xiang
2023-10-25 19:38 ` [PATCH 13/16] migration/multifd: Add migration option set packet size Hao Xiang
2023-10-30 15:03   ` Fabiano Rosas
2023-10-25 19:38 ` [PATCH 14/16] migration/multifd: Enable set packet size migration option Hao Xiang
2023-10-25 19:38 ` [PATCH 15/16] util/dsa: Add unit test coverage for Intel DSA task submission and completion Hao Xiang
2023-10-25 19:38 ` [PATCH 16/16] migration/multifd: Add integration tests for multifd with Intel DSA offloading Hao Xiang
2023-10-30 15:26   ` Fabiano Rosas
2023-10-30 15:26 ` [PATCH 00/16] Use Intel DSA accelerator to offload zero page checking in multifd live migration Fabiano Rosas
2023-10-31  1:02   ` [External] " Hao Xiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231025193822.2813204-11-hao.xiang@bytedance.com \
    --to=hao.xiang@bytedance.com \
    --cc=bryan.zhang@bytedance.com \
    --cc=marcandre.lureau@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).