From mboxrd@z Thu Jan  1 00:00:00 1970
Received: from eggs.gnu.org ([2001:4830:134:3::10]:44043)
	by lists.gnu.org with esmtp (Exim 4.71)
	(envelope-from <zhang.zhanghailiang@huawei.com>) id 1ZX3LH-0005pX-MY
	for qemu-devel@nongnu.org; Wed, 02 Sep 2015 04:24:28 -0400
Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71)
	(envelope-from <zhang.zhanghailiang@huawei.com>) id 1ZX3LG-00015e-FW
	for qemu-devel@nongnu.org; Wed, 02 Sep 2015 04:24:27 -0400
Received: from szxga01-in.huawei.com ([58.251.152.64]:28187)
	by eggs.gnu.org with esmtp (Exim 4.71)
	(envelope-from <zhang.zhanghailiang@huawei.com>) id 1ZX3LE-00014m-Pr
	for qemu-devel@nongnu.org; Wed, 02 Sep 2015 04:24:26 -0400
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
Date: Wed, 2 Sep 2015 16:23:06 +0800
Message-ID: <1441182199-8328-20-git-send-email-zhang.zhanghailiang@huawei.com>
In-Reply-To: <1441182199-8328-1-git-send-email-zhang.zhanghailiang@huawei.com>
References: <1441182199-8328-1-git-send-email-zhang.zhanghailiang@huawei.com>
MIME-Version: 1.0
Content-Type: text/plain
Subject: [Qemu-devel] [PATCH COLO-Frame v9 19/32] COLO: Implement failover
	work for Primary VM
List-Id: <qemu-devel.nongnu.org>
List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <http://lists.nongnu.org/archive/html/qemu-devel>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=subscribe>
To: qemu-devel@nongnu.org
Cc: lizhijian@cn.fujitsu.com, quintela@redhat.com, yunhong.jiang@intel.com, eddie.dong@intel.com, peter.huangpeng@huawei.com, dgilbert@redhat.com, arei.gonglei@huawei.com, stefanha@redhat.com, amit.shah@redhat.com, yanghy@cn.fujitsu.com, zhanghailiang <zhang.zhanghailiang@huawei.com>

For PVM, if there is failover request from users.
The colo thread will exit the loop while the failover BH does the
cleanup work and resumes VM.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
---
 include/migration/colo.h     |  4 +++
 include/migration/failover.h |  1 +
 migration/colo-failover.c    |  7 ++++-
 migration/colo.c             | 64 +++++++++++++++++++++++++++++++++++++++-----
 4 files changed, 69 insertions(+), 7 deletions(-)

diff --git a/include/migration/colo.h b/include/migration/colo.h
index 21f5c5d..5f6072e 100644
--- a/include/migration/colo.h
+++ b/include/migration/colo.h
@@ -37,4 +37,8 @@ int get_colo_mode(void);
 int colo_init_ram_cache(void);
 void colo_release_ram_cache(void);
 void colo_flush_ram_cache(void);
+
+/* failover */
+void colo_do_failover(MigrationState *s);
+
 #endif
diff --git a/include/migration/failover.h b/include/migration/failover.h
index 882c625..fba3931 100644
--- a/include/migration/failover.h
+++ b/include/migration/failover.h
@@ -26,5 +26,6 @@ void failover_init_state(void);
 int failover_set_state(int old_state, int new_state);
 int failover_get_state(void);
 void failover_request_active(Error **errp);
+bool failover_request_is_active(void);
 
 #endif
diff --git a/migration/colo-failover.c b/migration/colo-failover.c
index e055551..7299674 100644
--- a/migration/colo-failover.c
+++ b/migration/colo-failover.c
@@ -32,7 +32,7 @@ static void colo_failover_bh(void *opaque)
         error_report(" Unkown error for failover, old_state=%d", old_state);
         return;
     }
-    /*TODO: Do failover work */
+    colo_do_failover(NULL);
 }
 
 void failover_request_active(Error **errp)
@@ -67,6 +67,11 @@ int failover_get_state(void)
     return atomic_read(&failover_state);
 }
 
+bool failover_request_is_active(void)
+{
+    return ((failover_get_state() != FAILOVER_STATUS_NONE));
+}
+
 void qmp_colo_lost_heartbeat(Error **errp)
 {
     if (get_colo_mode() == COLO_MODE_UNKNOWN) {
diff --git a/migration/colo.c b/migration/colo.c
index 6c48dca..6aa6d18 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -48,6 +48,45 @@ bool migration_incoming_in_colo_state(void)
     return (mis && (mis->state == MIGRATION_STATUS_COLO));
 }
 
+static bool colo_runstate_is_stopped(void)
+{
+    return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
+}
+
+static void primary_vm_do_failover(void)
+{
+    MigrationState *s = migrate_get_current();
+    int old_state;
+
+    if (s->state != MIGRATION_STATUS_FAILED) {
+        migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
+                          MIGRATION_STATUS_COMPLETED);
+    }
+    qemu_bh_schedule(s->cleanup_bh);
+
+    vm_start();
+
+    old_state = failover_set_state(FAILOVER_STATUS_HANDLING,
+                                   FAILOVER_STATUS_COMPLETED);
+    if (old_state != FAILOVER_STATUS_HANDLING) {
+        error_report("Serious error while do failover for Primary VM,"
+                     "old_state: %d", old_state);
+        return;
+    }
+}
+
+void colo_do_failover(MigrationState *s)
+{
+    /* Make sure vm stopped while failover */
+    if (!colo_runstate_is_stopped()) {
+        vm_stop_force_state(RUN_STATE_COLO);
+    }
+
+    if (get_colo_mode() == COLO_MODE_PRIMARY) {
+        primary_vm_do_failover();
+    }
+}
+
 /* colo checkpoint control helper */
 static int colo_ctl_put(QEMUFile *f, uint32_t cmd, uint64_t value)
 {
@@ -132,9 +171,23 @@ static int colo_do_checkpoint_transaction(MigrationState *s,
 
     /* suspend and save vm state to colo buffer */
     qemu_mutex_lock_iothread();
+    if (failover_request_is_active()) {
+        qemu_mutex_unlock_iothread();
+        ret = -1;
+        goto out;
+    }
+    /* suspend and save vm state to colo buffer */
     vm_stop_force_state(RUN_STATE_COLO);
     qemu_mutex_unlock_iothread();
     trace_colo_vm_state_change("run", "stop");
+    /*
+     * failover request bh could be called after
+     * vm_stop_force_state so we check failover_request_is_active() again.
+     */
+    if (failover_request_is_active()) {
+        ret = -1;
+        goto out;
+    }
 
     /* Disable block migration */
     s->params.blk = 0;
@@ -234,6 +287,11 @@ static void *colo_thread(void *opaque)
     trace_colo_vm_state_change("stop", "run");
 
     while (s->state == MIGRATION_STATUS_COLO) {
+        if (failover_request_is_active()) {
+            error_report("failover request");
+            goto out;
+        }
+
         current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
         if (current_time - checkpoint_time < CHECKPOINT_MAX_PEROID) {
             g_usleep(100000);
@@ -251,8 +309,6 @@ out:
     if (ret < 0) {
         error_report("Detect some error: %s", strerror(-ret));
     }
-    migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
-                      MIGRATION_STATUS_COMPLETED);
 
     qsb_free(buffer);
     buffer = NULL;
@@ -261,10 +317,6 @@ out:
         qemu_fclose(s->from_dst_file);
     }
 
-    qemu_mutex_lock_iothread();
-    qemu_bh_schedule(s->cleanup_bh);
-    qemu_mutex_unlock_iothread();
-
     return NULL;
 }
 
-- 
1.8.3.1