qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Andrey Gruzdev via <qemu-devel@nongnu.org>
To: qemu-devel@nongnu.org
Cc: Den Lunev <den@openvz.org>, Eric Blake <eblake@redhat.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Juan Quintela <quintela@redhat.com>,
	"Dr . David Alan Gilbert" <dgilbert@redhat.com>,
	Markus Armbruster <armbru@redhat.com>,
	Peter Xu <peterx@redhat.com>,
	Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
Subject: [PATCH v3 4/7] implementation of write-tracking migration thread
Date: Thu, 19 Nov 2020 15:59:37 +0300	[thread overview]
Message-ID: <20201119125940.20017-5-andrey.gruzdev@virtuozzo.com> (raw)
In-Reply-To: <20201119125940.20017-1-andrey.gruzdev@virtuozzo.com>

Signed-off-by: Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
---
 migration/migration.c | 168 +++++++++++++++++++++++++++++++++++++++++-
 migration/migration.h |   3 +
 migration/savevm.c    |   1 -
 migration/savevm.h    |   2 +
 4 files changed, 171 insertions(+), 3 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index ff0364dde0..158e5441ec 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2013,6 +2013,7 @@ void migrate_init(MigrationState *s)
      * locks.
      */
     s->cleanup_bh = 0;
+    s->wt_vm_start_bh = 0;
     s->to_dst_file = NULL;
     s->state = MIGRATION_STATUS_NONE;
     s->rp_state.from_dst_file = NULL;
@@ -3551,6 +3552,21 @@ static void migration_iteration_finish(MigrationState *s)
     qemu_mutex_unlock_iothread();
 }
 
+static void wt_migration_iteration_finish(MigrationState *s)
+{
+    /* TODO: implement */
+}
+
+/*
+ * Return true if continue to the next iteration directly, false
+ * otherwise.
+ */
+static MigIterateState wt_migration_iteration_run(MigrationState *s)
+{
+    /* TODO: implement */
+    return MIG_ITERATE_RESUME;
+}
+
 void migration_make_urgent_request(void)
 {
     qemu_sem_post(&migrate_get_current()->rate_limit_sem);
@@ -3698,6 +3714,148 @@ static void *migration_thread(void *opaque)
     return NULL;
 }
 
+static void wt_migration_vm_start_bh(void *opaque)
+{
+    /* TODO: implement */
+}
+
+/*
+ * Master migration thread on the source VM.
+ * This is an alternative implementation of live migration
+ * which uses userfault_fd write protection mechanism introduced in
+ * 5.7 kernel. Compared to existing dirty page logging migration
+ * it produces much lesser traffic and smaller snapshot images since
+ * no page duplicates can get into the stream. Another the key point
+ * is that generated vmstate stream reflects machine state 'frozen'
+ * at the beginning of migration compared to dirty page logging
+ * mechanism, which effectively results in that saved snapshot is the
+ * state at the end of migration process.
+ */
+static void *wt_migration_thread(void *opaque)
+{
+    MigrationState *s = opaque;
+    int64_t setup_start;
+    MigThrError thr_error;
+    QEMUFile *fb;
+    bool early_fail = true;
+
+    rcu_register_thread();
+    object_ref(OBJECT(s));
+
+    qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
+
+    setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+    /*
+     * We want to save vmstate for the moment when migration has been
+     * initiated but also we want to save RAM content while VM is running.
+     * The RAM content should appear first in the vmstate. So, we first
+     * stash the non-RAM part of the vmstate to the temporary buffer,
+     * then write RAM part of the vmstate to the migration stream
+     * with vCPUs running and, finally, write stashed non-RAM part of
+     * the vmstate from the buffer to the migration stream.
+     */
+    s->bioc = qio_channel_buffer_new(128 * 1024);
+    qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer");
+    fb = qemu_fopen_channel_output(QIO_CHANNEL(s->bioc));
+    object_unref(OBJECT(s->bioc));
+
+    update_iteration_initial_status(s);
+
+    qemu_savevm_state_header(s->to_dst_file);
+    qemu_savevm_state_setup(s->to_dst_file);
+
+    if (qemu_savevm_state_guest_unplug_pending()) {
+        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
+                          MIGRATION_STATUS_WAIT_UNPLUG);
+
+        while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
+               qemu_savevm_state_guest_unplug_pending()) {
+            qemu_sem_timedwait(&s->wait_unplug_sem, 250);
+        }
+
+        migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG,
+                          MIGRATION_STATUS_ACTIVE);
+    }
+    s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
+
+    migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
+                      MIGRATION_STATUS_ACTIVE);
+    trace_migration_thread_setup_complete();
+    s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+    qemu_mutex_lock_iothread();
+
+    if (global_state_store()) {
+        goto fail;
+    }
+    /* Forcibly stop VM before saving state of vCPUs and devices */
+    if (vm_stop_force_state(RUN_STATE_PAUSED)) {
+        goto fail;
+    }
+    /*
+     * Put vCPUs in sync with shadow context structures, then
+     * save their state to channel-buffer along with devices.
+     */
+    cpu_synchronize_all_states();
+    if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) {
+        goto fail;
+    }
+    /* Now initialize UFFD context and start tracking RAM writes */
+    if (ram_write_tracking_start()) {
+        goto fail;
+    }
+    early_fail = false;
+
+    /*
+     * Start VM from BH handler to avoid write-fault lock here.
+     * UFFD-WP protection for the whole RAM is already enabled so
+     * calling VM state change notifiers from vm_start() would initiate
+     * writes to virtio VQs memory which is in write-protected region.
+     */
+    s->wt_vm_start_bh = qemu_bh_new(wt_migration_vm_start_bh, s);
+    qemu_bh_schedule(s->wt_vm_start_bh);
+
+    qemu_mutex_unlock_iothread();
+
+    while (migration_is_active(s)) {
+        MigIterateState iter_state = wt_migration_iteration_run(s);
+        if (iter_state == MIG_ITERATE_SKIP) {
+            continue;
+        } else if (iter_state == MIG_ITERATE_BREAK) {
+            break;
+        }
+
+        /*
+         * Try to detect any kind of failures, and see whether we
+         * should stop the migration now.
+         */
+        thr_error = migration_detect_error(s);
+        if (thr_error == MIG_THR_ERR_FATAL) {
+            /* Stop migration */
+            break;
+        }
+
+        migration_update_counters(s, qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
+    }
+
+    trace_migration_thread_after_loop();
+
+fail:
+    if (early_fail) {
+        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
+                MIGRATION_STATUS_FAILED);
+        qemu_mutex_unlock_iothread();
+    }
+
+    wt_migration_iteration_finish(s);
+
+    qemu_fclose(fb);
+    object_unref(OBJECT(s));
+    rcu_unregister_thread();
+
+    return NULL;
+}
+
 void migrate_fd_connect(MigrationState *s, Error *error_in)
 {
     Error *local_err = NULL;
@@ -3761,8 +3919,14 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
         migrate_fd_cleanup(s);
         return;
     }
-    qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
-                       QEMU_THREAD_JOINABLE);
+
+    if (migrate_track_writes_ram()) {
+        qemu_thread_create(&s->thread, "wt_live_migration",
+                wt_migration_thread, s, QEMU_THREAD_JOINABLE);
+    } else {
+        qemu_thread_create(&s->thread, "live_migration",
+                migration_thread, s, QEMU_THREAD_JOINABLE);
+    }
     s->migration_thread_running = true;
 }
 
diff --git a/migration/migration.h b/migration/migration.h
index 339ae720e0..c3b4c7f2fd 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -20,6 +20,7 @@
 #include "qemu/thread.h"
 #include "qemu/coroutine_int.h"
 #include "io/channel.h"
+#include "io/channel-buffer.h"
 #include "net/announce.h"
 #include "qom/object.h"
 
@@ -147,8 +148,10 @@ struct MigrationState {
 
     /*< public >*/
     QemuThread thread;
+    QEMUBH *wt_vm_start_bh;
     QEMUBH *cleanup_bh;
     QEMUFile *to_dst_file;
+    QIOChannelBuffer *bioc;
     /*
      * Protects to_dst_file pointer.  We need to make sure we won't
      * yield or hang during the critical section, since this lock will
diff --git a/migration/savevm.c b/migration/savevm.c
index 5f937a2762..62d5f8a869 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1352,7 +1352,6 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
     return 0;
 }
 
-static
 int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
                                                     bool in_postcopy,
                                                     bool inactivate_disks)
diff --git a/migration/savevm.h b/migration/savevm.h
index ba64a7e271..aaee2528ed 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -64,5 +64,7 @@ int qemu_loadvm_state(QEMUFile *f);
 void qemu_loadvm_state_cleanup(void);
 int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
 int qemu_load_device_state(QEMUFile *f);
+int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
+        bool in_postcopy, bool inactivate_disks);
 
 #endif
-- 
2.25.1



  parent reply	other threads:[~2020-11-19 13:02 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-19 12:59 [PATCH v3 0/7] UFFD write-tracking migration/snapshots Andrey Gruzdev via
2020-11-19 12:59 ` [PATCH v3 1/7] introduce 'track-writes-ram' migration capability Andrey Gruzdev via
2020-11-19 18:51   ` Peter Xu
2020-11-19 19:07     ` Peter Xu
2020-11-20 11:35       ` Andrey Gruzdev
2020-11-24 16:55       ` Dr. David Alan Gilbert
2020-11-24 17:25         ` Andrey Gruzdev
2020-11-20 11:32     ` Andrey Gruzdev
2020-11-19 12:59 ` [PATCH v3 2/7] introduce UFFD-WP low-level interface helpers Andrey Gruzdev via
2020-11-19 18:39   ` Peter Xu
2020-11-20 11:04     ` Andrey Gruzdev
2020-11-20 15:01       ` Peter Xu
2020-11-20 15:43         ` Andrey Gruzdev
2020-11-24 17:57   ` Dr. David Alan Gilbert
2020-11-25  8:11     ` Andrey Gruzdev
2020-11-25 18:43       ` Dr. David Alan Gilbert
2020-11-25 19:17         ` Andrey Gruzdev
2020-11-19 12:59 ` [PATCH v3 3/7] support UFFD write fault processing in ram_save_iterate() Andrey Gruzdev via
2020-11-19 18:25   ` Peter Xu
2020-11-20 10:44     ` Andrey Gruzdev
2020-11-20 15:07       ` Peter Xu
2020-11-20 16:15         ` Andrey Gruzdev
2020-11-20 16:43           ` Peter Xu
2020-11-20 16:53             ` Andrey Gruzdev
2020-11-23 21:34               ` Peter Xu
2020-11-24  8:02                 ` Andrey Gruzdev
2020-11-24 15:17                   ` Peter Xu
2020-11-24 17:40                     ` Andrey Gruzdev
2020-11-25 13:08   ` Dr. David Alan Gilbert
2020-11-25 14:40     ` Andrey Gruzdev
2020-11-25 18:41       ` Dr. David Alan Gilbert
2020-11-25 19:12         ` Andrey Gruzdev
2020-11-19 12:59 ` Andrey Gruzdev via [this message]
2020-11-19 18:47   ` [PATCH v3 4/7] implementation of write-tracking migration thread Peter Xu
2020-11-20 11:41     ` Andrey Gruzdev
2020-11-19 12:59 ` [PATCH v3 5/7] implementation of vm_start() BH Andrey Gruzdev via
2020-11-19 18:46   ` Peter Xu
2020-11-20 11:13     ` Andrey Gruzdev
2020-11-19 12:59 ` [PATCH v3 6/7] the rest of write tracking migration code Andrey Gruzdev via
2020-11-19 12:59 ` [PATCH v3 7/7] introduce simple linear scan rate limiting mechanism Andrey Gruzdev via
2020-11-19 20:02   ` Peter Xu
2020-11-20 12:06     ` Andrey Gruzdev
2020-11-20 15:23       ` Peter Xu
2020-11-24 16:41 ` [PATCH v3 0/7] UFFD write-tracking migration/snapshots Dr. David Alan Gilbert

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201119125940.20017-5-andrey.gruzdev@virtuozzo.com \
    --to=qemu-devel@nongnu.org \
    --cc=andrey.gruzdev@virtuozzo.com \
    --cc=armbru@redhat.com \
    --cc=den@openvz.org \
    --cc=dgilbert@redhat.com \
    --cc=eblake@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=quintela@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).