From: yong.huang@smartx.com
To: qemu-devel@nongnu.org
Cc: Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>,
Eric Blake <eblake@redhat.com>,
Markus Armbruster <armbru@redhat.com>,
Paolo Bonzini <pbonzini@redhat.com>,
yong.huang@smartx.com
Subject: [PATCH v2 1/3] migration: Support background ramblock dirty sync
Date: Mon, 30 Sep 2024 01:14:26 +0800 [thread overview]
Message-ID: <f36590f60307ce9647d3506e55dcbc2405c98ee2.1727630000.git.yong.huang@smartx.com> (raw)
In-Reply-To: <cover.1727630000.git.yong.huang@smartx.com>
From: Hyman Huang <yong.huang@smartx.com>
When VM is configured with huge memory, the current throttle logic
doesn't look like to scale, because migration_trigger_throttle()
is only called for each iteration, so it won't be invoked for a long
time if one iteration can take a long time.
The background dirty sync aim to fix the above issue by synchronizing
the ramblock from remote dirty bitmap and, when necessary, triggering
the CPU throttle multiple times during a long iteration.
This is a trade-off between synchronization overhead and CPU throttle
impact.
Signed-off-by: Hyman Huang <yong.huang@smartx.com>
---
include/migration/misc.h | 3 ++
migration/migration.c | 11 +++++++
migration/ram.c | 64 ++++++++++++++++++++++++++++++++++++
migration/ram.h | 3 ++
migration/trace-events | 1 +
system/cpu-timers.c | 2 ++
tests/qtest/migration-test.c | 29 ++++++++++++++++
7 files changed, 113 insertions(+)
diff --git a/include/migration/misc.h b/include/migration/misc.h
index bfadc5613b..67c00d98f5 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -111,4 +111,7 @@ bool migration_in_bg_snapshot(void);
/* migration/block-dirty-bitmap.c */
void dirty_bitmap_mig_init(void);
+/* migration/ram.c */
+void bg_ram_dirty_sync_init(void);
+
#endif
diff --git a/migration/migration.c b/migration/migration.c
index 3dea06d577..224b5dfb4f 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -3285,6 +3285,9 @@ static void migration_iteration_finish(MigrationState *s)
{
/* If we enabled cpu throttling for auto-converge, turn it off. */
cpu_throttle_stop();
+ if (migrate_auto_converge()) {
+ bg_ram_dirty_sync_timer_enable(false);
+ }
bql_lock();
switch (s->state) {
@@ -3526,6 +3529,14 @@ static void *migration_thread(void *opaque)
trace_migration_thread_setup_complete();
+ /*
+ * Tick the background ramblock dirty sync timer after setup
+ * phase.
+ */
+ if (migrate_auto_converge()) {
+ bg_ram_dirty_sync_timer_enable(true);
+ }
+
while (migration_is_active()) {
if (urgent || !migration_rate_exceeded(s->to_dst_file)) {
MigIterateState iter_state = migration_iteration_run(s);
diff --git a/migration/ram.c b/migration/ram.c
index 67ca3d5d51..995bae1ac9 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -110,6 +110,12 @@
*/
#define MAPPED_RAM_LOAD_BUF_SIZE 0x100000
+/* Background ramblock dirty sync trigger every five seconds */
+#define BG_RAM_SYNC_TIMESLICE_MS 5000
+#define BG_RAM_SYNC_TIMER_INTERVAL_MS 1000
+
+static QEMUTimer *bg_ram_dirty_sync_timer;
+
XBZRLECacheStats xbzrle_counters;
/* used by the search for pages to send */
@@ -4543,6 +4549,64 @@ static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host,
}
}
+static void bg_ram_dirty_sync_timer_tick(void *opaque)
+{
+ static int prev_pct;
+ static uint64_t prev_sync_cnt = 2;
+ uint64_t sync_cnt = stat64_get(&mig_stats.dirty_sync_count);
+ int cur_pct = cpu_throttle_get_percentage();
+
+ if (prev_pct && !cur_pct) {
+ /* CPU throttle timer stopped, so do we */
+ return;
+ }
+
+ /*
+ * The first iteration copies all memory anyhow and has no
+ * effect on guest performance, therefore omit it to avoid
+ * paying extra for the sync penalty.
+ */
+ if (sync_cnt <= 1) {
+ goto end;
+ }
+
+ if (sync_cnt == prev_sync_cnt) {
+ int64_t curr_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ assert(ram_state);
+ if ((curr_time - ram_state->time_last_bitmap_sync) >
+ BG_RAM_SYNC_TIMESLICE_MS) {
+ trace_bg_ram_dirty_sync();
+ WITH_RCU_READ_LOCK_GUARD() {
+ migration_bitmap_sync_precopy(ram_state, false);
+ }
+ }
+ }
+
+end:
+ prev_sync_cnt = stat64_get(&mig_stats.dirty_sync_count);
+ prev_pct = cpu_throttle_get_percentage();
+
+ timer_mod(bg_ram_dirty_sync_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) +
+ BG_RAM_SYNC_TIMER_INTERVAL_MS);
+}
+
+void bg_ram_dirty_sync_timer_enable(bool enable)
+{
+ if (enable) {
+ bg_ram_dirty_sync_timer_tick(NULL);
+ } else {
+ timer_del(bg_ram_dirty_sync_timer);
+ }
+}
+
+void bg_ram_dirty_sync_init(void)
+{
+ bg_ram_dirty_sync_timer =
+ timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
+ bg_ram_dirty_sync_timer_tick, NULL);
+}
+
static RAMBlockNotifier ram_mig_ram_notifier = {
.ram_block_resized = ram_mig_ram_block_resized,
};
diff --git a/migration/ram.h b/migration/ram.h
index bc0318b834..9c1a2f30f1 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -93,4 +93,7 @@ void ram_write_tracking_prepare(void);
int ram_write_tracking_start(void);
void ram_write_tracking_stop(void);
+/* Background ramblock dirty sync */
+void bg_ram_dirty_sync_timer_enable(bool enable);
+
#endif
diff --git a/migration/trace-events b/migration/trace-events
index c65902f042..3f09e7f383 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -90,6 +90,7 @@ put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)"
qemu_file_fclose(void) ""
# ram.c
+bg_ram_dirty_sync(void) ""
get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
migration_bitmap_sync_start(void) ""
diff --git a/system/cpu-timers.c b/system/cpu-timers.c
index 0b31c9a1b6..64f0834be4 100644
--- a/system/cpu-timers.c
+++ b/system/cpu-timers.c
@@ -25,6 +25,7 @@
#include "qemu/osdep.h"
#include "qemu/cutils.h"
#include "migration/vmstate.h"
+#include "migration/misc.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "sysemu/cpus.h"
@@ -274,4 +275,5 @@ void cpu_timers_init(void)
vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
cpu_throttle_init();
+ bg_ram_dirty_sync_init();
}
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index d6768d5d71..3296f5244d 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -468,6 +468,12 @@ static void migrate_ensure_converge(QTestState *who)
migrate_set_parameter_int(who, "downtime-limit", 30 * 1000);
}
+static void migrate_ensure_iteration_last_long(QTestState *who)
+{
+ /* Set 10Byte/s bandwidth limit to make the iteration last long enough */
+ migrate_set_parameter_int(who, "max-bandwidth", 10);
+}
+
/*
* Our goal is to ensure that we run a single full migration
* iteration, and also dirty memory, ensuring that at least
@@ -2791,6 +2797,7 @@ static void test_migrate_auto_converge(void)
* so we need to decrease a bandwidth.
*/
const int64_t init_pct = 5, inc_pct = 25, max_pct = 95;
+ uint64_t prev_dirty_sync_cnt, dirty_sync_cnt;
if (test_migrate_start(&from, &to, uri, &args)) {
return;
@@ -2827,6 +2834,28 @@ static void test_migrate_auto_converge(void)
} while (true);
/* The first percentage of throttling should be at least init_pct */
g_assert_cmpint(percentage, >=, init_pct);
+
+ /* Make sure the iteration last a long time enough */
+ migrate_ensure_iteration_last_long(from);
+
+ /*
+ * End the loop when the dirty sync count greater than 1.
+ */
+ while ((dirty_sync_cnt = get_migration_pass(from)) < 2) {
+ usleep(1000 * 1000);
+ }
+
+ prev_dirty_sync_cnt = dirty_sync_cnt;
+
+ /*
+ * The dirty sync count must changes in 5 seconds, here we
+ * plus 1 second as error value.
+ */
+ sleep(5 + 1);
+
+ dirty_sync_cnt = get_migration_pass(from);
+ g_assert_cmpint(dirty_sync_cnt, != , prev_dirty_sync_cnt);
+
/* Now, when we tested that throttling works, let it converge */
migrate_ensure_converge(from);
--
2.27.0
next prev parent reply other threads:[~2024-09-29 17:16 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-29 17:14 [PATCH v2 0/3] migration: auto-converge refinements for huge VM yong.huang
2024-09-29 17:14 ` yong.huang [this message]
2024-09-30 20:41 ` [PATCH v2 1/3] migration: Support background ramblock dirty sync Peter Xu
2024-10-01 2:02 ` Yong Huang
2024-10-01 15:28 ` Peter Xu
2024-10-08 2:27 ` Yong Huang
2024-09-29 17:14 ` [PATCH v2 2/3] qapi/migration: Introduce cpu-throttle-responsive parameter yong.huang
2024-09-29 17:14 ` [PATCH v2 3/3] migration: Support responsive CPU throttle yong.huang
2024-09-30 20:47 ` Peter Xu
2024-10-01 2:18 ` Yong Huang
2024-10-01 15:37 ` Peter Xu
2024-10-08 2:34 ` Yong Huang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=f36590f60307ce9647d3506e55dcbc2405c98ee2.1727630000.git.yong.huang@smartx.com \
--to=yong.huang@smartx.com \
--cc=armbru@redhat.com \
--cc=eblake@redhat.com \
--cc=farosas@suse.de \
--cc=pbonzini@redhat.com \
--cc=peterx@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).