xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Wen Congyang <wency@cn.fujitsu.com>
To: xen devel <xen-devel@lists.xen.org>
Cc: Ian Campbell <Ian.Campbell@citrix.com>,
	Wen Congyang <wency@cn.fujitsu.com>,
	Ian Jackson <Ian.Jackson@eu.citrix.com>,
	Jiang Yunhong <yunhong.jiang@intel.com>,
	Dong Eddie <eddie.dong@intel.com>,
	Yang Hongyang <yanghy@cn.fujitsu.com>,
	Lai Jiangshan <laijs@cn.fujitsu.com>
Subject: [RFC Patch v2 19/45] primary vm suspend/get_dirty_pfn/resume/checkpoint code
Date: Fri, 8 Aug 2014 15:01:18 +0800	[thread overview]
Message-ID: <1407481305-19808-20-git-send-email-wency@cn.fujitsu.com> (raw)
In-Reply-To: <1407481305-19808-1-git-send-email-wency@cn.fujitsu.com>

We will do the following things again and again:
1. Suspend primary vm
   a. Suspend primary vm
   b. do postsuspend
   c. Read LIBXL_COLO_SVM_SUSPENDED to master
   d. Read secondary vm's dirty page information to master(count + pfn list)
2. Get dirty pfn list
   a. Return secondary vm's dirty pfn list
3. Resume primary vm
   a. Read LIBXL_COLO_SVM_READY from slave
   b. Do presume
   c. Resume primary vm
   d. Read LIBXL_COLO_SVM_RESUMED from slave
4. Wait a new checkpoint
    a. Wait a new checkpoint(not implemented)
    b. Send LIBXL_COLO_NEW_CHECKPOINT to slave

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 tools/libxc/xenguest.h             |  12 +
 tools/libxl/Makefile               |   2 +-
 tools/libxl/libxl.c                |   6 +-
 tools/libxl/libxl_colo.h           |  10 +
 tools/libxl/libxl_colo_save.c      | 608 +++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_dom.c            |  13 +-
 tools/libxl/libxl_internal.h       |  32 +-
 tools/libxl/libxl_save_msgs_gen.pl |   1 +
 tools/libxl/libxl_types.idl        |   1 +
 9 files changed, 677 insertions(+), 8 deletions(-)
 create mode 100644 tools/libxl/libxl_colo_save.c

diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h
index d3061c7..1aeaad2 100644
--- a/tools/libxc/xenguest.h
+++ b/tools/libxc/xenguest.h
@@ -72,6 +72,18 @@ struct save_callbacks {
      */
     int (*toolstack_save)(uint32_t domid, uint8_t **buf, uint32_t *len, void *data);
 
+    /* Called after the guest is suspended.
+     *
+     * returns the list of dirty pfn:
+     *  struct {
+     *      uint64_t count;
+     *      uint64_t pfn[];
+     *  };
+     *
+     *  Note: the caller must free the return value.
+     */
+    uint8_t *(*get_dirty_pfn)(void *data);
+
     /* to be provided as the last argument to each callback function */
     void* data;
 };
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index c026bdd..1c32ae2 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -57,7 +57,7 @@ LIBXL_OBJS-y += libxl_nonetbuffer.o
 endif
 
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
-LIBXL_OBJS-y += libxl_colo_restore.o
+LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index f35029a..e0817e8 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -18,6 +18,7 @@
 
 #include "libxl_internal.h"
 #include "libxl_remus.h"
+#include "libxl_colo.h"
 
 #define PAGE_TO_MEMKB(pages) ((pages) * 4)
 #define BACKEND_STRING_SIZE 5
@@ -820,7 +821,10 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info,
     assert(info);
 
     /* Point of no return */
-    libxl__remus_setup(egc, &dss->rs);
+    if (info->colo)
+        libxl__colo_save_setup(egc, &dss->css);
+    else
+        libxl__remus_setup(egc, &dss->rs);
     return AO_INPROGRESS;
 
  out:
diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
index 91df275..26a2563 100644
--- a/tools/libxl/libxl_colo.h
+++ b/tools/libxl/libxl_colo.h
@@ -35,4 +35,14 @@ extern void libxl__colo_restore_teardown(libxl__egc *egc,
                                          libxl__colo_restore_state *crs,
                                          int rc);
 
+extern void libxl__colo_save_domain_suspend_callback(void *data);
+extern void libxl__colo_save_domain_resume_callback(void *data);
+extern void libxl__colo_save_domain_checkpoint_callback(void *data);
+extern void libxl__colo_save_get_dirty_pfn_callback(void *data);
+extern void libxl__colo_save_setup(libxl__egc *egc,
+                                   libxl__colo_save_state *css);
+extern void libxl__colo_save_teardown(libxl__egc *egc,
+                                      libxl__colo_save_state *css,
+                                      int rc);
+
 #endif
diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c
new file mode 100644
index 0000000..6675d3d
--- /dev/null
+++ b/tools/libxl/libxl_colo_save.c
@@ -0,0 +1,608 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author: Wen Congyang <wency@cn.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+#include "libxl_colo.h"
+
+static const libxl__checkpoint_device_subkind_ops *colo_ops[] = {
+    NULL,
+};
+
+/* ================= colo: setup save environment ================= */
+static void colo_save_setup_done(libxl__egc *egc,
+                                 libxl__checkpoint_devices_state *cds,
+                                 int rc);
+static void colo_save_setup_failed(libxl__egc *egc,
+                                   libxl__checkpoint_devices_state *cds,
+                                   int rc);
+
+void libxl__colo_save_setup(libxl__egc *egc, libxl__colo_save_state *css)
+{
+    libxl__domain_suspend_state *dss = CONTAINER_OF(css, *dss, css);
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *const cds = &css->cds;
+
+    STATE_AO_GC(dss->ao);
+
+    if (dss->type != LIBXL_DOMAIN_TYPE_HVM) {
+        LOG(ERROR, "COLO only supports hvm now");
+        goto out;
+    }
+
+    css->send_fd = dss->fd;
+    css->recv_fd = dss->recv_fd;
+    css->svm_running = false;
+
+    /* TODO: disk/nic support */
+    cds->device_kind_flags = 0;
+    cds->ops = colo_ops;
+    cds->callback = colo_save_setup_done;
+    cds->ao = ao;
+    cds->domid = dss->domid;
+
+    libxl__checkpoint_devices_setup(egc, &css->cds);
+
+    return;
+
+out:
+    libxl__ao_complete(egc, ao, ERROR_FAIL);
+}
+
+static void colo_save_setup_done(libxl__egc *egc,
+                                 libxl__checkpoint_devices_state *cds,
+                                 int rc)
+{
+    libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+    libxl__domain_suspend_state *dss = CONTAINER_OF(css, *dss, css);
+    STATE_AO_GC(cds->ao);
+
+    if (!rc) {
+        libxl__domain_suspend(egc, dss);
+        return;
+    }
+
+    LOG(ERROR, "COLO: failed to setup device for guest with domid %u",
+        dss->domid);
+    css->cds.callback = colo_save_setup_failed;
+    libxl__checkpoint_devices_teardown(egc, &css->cds);
+}
+
+static void colo_save_setup_failed(libxl__egc *egc,
+                                   libxl__checkpoint_devices_state *cds,
+                                   int rc)
+{
+    STATE_AO_GC(cds->ao);
+
+    if (rc)
+        LOG(ERROR, "COLO: failed to teardown device after setup failed"
+            " for guest with domid %u, rc %d", cds->domid, rc);
+
+    libxl__ao_complete(egc, ao, rc);
+}
+
+
+/* ================= colo: teardown save environment ================= */
+static void colo_teardown_done(libxl__egc *egc,
+                               libxl__checkpoint_devices_state *cds,
+                               int rc);
+
+void libxl__colo_save_teardown(libxl__egc *egc,
+                               libxl__colo_save_state *css,
+                               int rc)
+{
+    libxl__domain_suspend_state *dss = CONTAINER_OF(css, *dss, css);
+
+    STATE_AO_GC(css->cds.ao);
+
+    LOG(WARN, "COLO: Domain suspend terminated with rc %d,"
+        " teardown COLO devices...", rc);
+    dss->css.cds.callback = colo_teardown_done;
+    libxl__checkpoint_devices_teardown(egc, &dss->css.cds);
+    return;
+}
+
+static void colo_teardown_done(libxl__egc *egc,
+                               libxl__checkpoint_devices_state *cds,
+                               int rc)
+{
+    libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+    libxl__domain_suspend_state *dss = CONTAINER_OF(css, *dss, css);
+    dss->callback(egc, dss, rc);
+}
+
+/*
+ * checkpoint callbacks are called in the following order:
+ * 1. suspend
+ * 2. resume
+ * 3. checkpoint
+ */
+static void colo_common_read_done(libxl__egc *egc,
+                                  libxl__datareader_state *drs,
+                                  ssize_t real_size, int errnoval);
+/* ===================== colo: suspend primary vm ===================== */
+/*
+ * Do the following things when suspending primary vm:
+ * 1. suspend primary vm
+ * 2. do postsuspend
+ * 3. read LIBXL_COLO_SVM_SUSPENDED
+ * 4. read secondary vm's dirty pages
+ */
+static void colo_suspend_primary_vm_done(libxl__egc *egc,
+                                         libxl__domain_suspend_state2 *dss2,
+                                         int ok);
+static void colo_postsuspend_cb(libxl__egc *egc,
+                                libxl__checkpoint_devices_state *cds,
+                                int rc);
+static void colo_read_pfn(libxl__egc *egc, libxl__colo_save_state *css);
+
+void libxl__colo_save_domain_suspend_callback(void *data)
+{
+    libxl__save_helper_state *shs = data;
+    libxl__egc *egc = shs->egc;
+    libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs);
+
+    /* Convenience aliases */
+    libxl__domain_suspend_state2 *dss2 = &dss->dss2;
+
+    dss2->callback_common_done = colo_suspend_primary_vm_done;
+    libxl__domain_suspend2(egc, dss2);
+}
+
+static void colo_suspend_primary_vm_done(libxl__egc *egc,
+                                         libxl__domain_suspend_state2 *dss2,
+                                         int ok)
+{
+    libxl__domain_suspend_state *dss = CONTAINER_OF(dss2, *dss, dss2);
+
+    STATE_AO_GC(dss2->ao);
+
+    if (!ok) {
+        LOG(ERROR, "cannot suspend primary vm");
+        goto out;
+    }
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *const cds = &dss->css.cds;
+
+    cds->callback = colo_postsuspend_cb;
+    libxl__checkpoint_devices_postsuspend(egc, cds);
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
+
+static void colo_postsuspend_cb(libxl__egc *egc,
+                                libxl__checkpoint_devices_state *cds,
+                                int rc)
+{
+    int ok = 0;
+    libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+    libxl__domain_suspend_state *dss = CONTAINER_OF(css, *dss, css);
+
+    /* Convenience aliases */
+    libxl__datareader_state *const drs = &css->drs;
+
+    STATE_AO_GC(cds->ao);
+
+    if (rc) {
+        LOG(ERROR, "postsuspend fails");
+        goto out;
+    }
+
+    if (!css->svm_running) {
+        ok = 1;
+        goto out;
+    }
+
+    /*
+     * read LIBXL_COLO_SVM_SUSPENDED and the count of
+     * secondary vm's dirty pages.
+     */
+    memset(drs, 0, sizeof(*drs));
+    drs->ao = ao;
+    drs->readfd = css->recv_fd;
+    drs->readsize = sizeof(css->temp_buff);
+    drs->readwhat = "colo stream";
+    drs->callback = colo_common_read_done;
+    drs->buf = css->temp_buff;
+    css->callback = colo_read_pfn;
+
+    if (libxl__datareader_start(drs)) {
+        LOG(ERROR, "libxl__datareader_start() fails");
+        goto out;
+    }
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
+
+static void colo_read_pfn(libxl__egc *egc, libxl__colo_save_state *css)
+{
+    int ok = 0;
+    libxl__domain_suspend_state *dss = CONTAINER_OF(css, *dss, css);
+    STATE_AO_GC(css->cds.ao);
+
+    /* Convenience aliases */
+    libxl__datareader_state *const drs = &css->drs;
+
+    assert(!css->buff);
+    css->section = css->temp_buff[0];
+    css->count = *(uint64_t *)(&css->temp_buff[1]);
+
+    if (css->section != LIBXL_COLO_SVM_SUSPENDED) {
+        LOG(ERROR, "invalid section: %d, expected: %d",
+            css->section, LIBXL_COLO_SVM_SUSPENDED);
+        goto out;
+    }
+
+    css->buff = libxl__zalloc(NOGC, sizeof(uint64_t) * (css->count + 1));
+    css->buff[0] = css->count;
+
+    if (css->count == 0) {
+        /* no dirty pages */
+        ok = 1;
+        goto out;
+    }
+
+    /* read the pfn of secondary vm's dirty pages */
+    memset(drs, 0, sizeof(*drs));
+    drs->ao = ao;
+    drs->readfd = css->recv_fd;
+    drs->readsize = css->count * sizeof(uint64_t);
+    drs->readwhat = "colo stream";
+    drs->callback = colo_common_read_done;
+    drs->buf = css->buff + 1;
+    css->callback = NULL;
+
+    if (libxl__datareader_start(drs)) {
+        LOG(ERROR, "libxl__datareader_start() fails");
+        goto out;
+    }
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
+
+
+/* ===================== colo: get dirty pfn ===================== */
+void libxl__colo_save_get_dirty_pfn_callback(void *data)
+{
+    libxl__save_helper_state *shs = data;
+    libxl__egc *egc = shs->egc;
+    libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs);
+    uint64_t size;
+
+    /* Convenience aliases */
+    libxl__colo_save_state *const css = &dss->css;
+
+    assert(css->buff);
+    size = sizeof(uint64_t) * (css->count + 1);
+
+    libxl__xc_domain_saverestore_async_callback_done_with_data(egc, shs,
+                                                               (uint8_t *)css->buff,
+                                                               size);
+    free(css->buff);
+    css->buff = NULL;
+}
+
+
+/* ===================== colo: resume primary vm ===================== */
+/*
+ * Do the following things when resuming primary vm:
+ *  1. read LIBXL_COLO_SVM_READY
+ *  2. do preresume
+ *  3. resume primary vm
+ *  4. read LIBXL_COLO_SVM_RESUMED
+ */
+static void colo_preresume_dm_saved(libxl__egc *egc,
+                                    libxl__domain_suspend_state *dss, int rc);
+static void colo_read_svm_ready_done(libxl__egc *egc,
+                                     libxl__colo_save_state *css);
+static void colo_preresume_cb(libxl__egc *egc,
+                              libxl__checkpoint_devices_state *cds,
+                              int rc);
+static void colo_read_svm_resumed_done(libxl__egc *egc,
+                                       libxl__colo_save_state *css);
+
+void libxl__colo_save_domain_resume_callback(void *data)
+{
+    libxl__save_helper_state *shs = data;
+    libxl__egc *egc = shs->egc;
+    libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs);
+
+    /* This would go into tailbuf. */
+    if (dss->hvm) {
+        libxl__domain_save_device_model(egc, dss, colo_preresume_dm_saved);
+    } else {
+        colo_preresume_dm_saved(egc, dss, 0);
+    }
+
+    return;
+}
+
+static void colo_preresume_dm_saved(libxl__egc *egc,
+                                    libxl__domain_suspend_state *dss, int rc)
+{
+    /* Convenience aliases */
+    libxl__colo_save_state *const css = &dss->css;
+    libxl__datareader_state *const drs = &css->drs;
+
+    STATE_AO_GC(css->cds.ao);
+
+    if (rc) {
+        LOG(ERROR, "Failed to save device model. Terminating COLO..");
+        goto out;
+    }
+
+    /* read LIBXL_COLO_SVM_READY */
+    memset(drs, 0, sizeof(*drs));
+    drs->ao = ao;
+    drs->readfd = css->recv_fd;
+    drs->readsize = sizeof(css->section);
+    drs->readwhat = "colo stream";
+    drs->callback = colo_common_read_done;
+    drs->buf = &css->section;
+    css->callback = colo_read_svm_ready_done;
+
+    if (libxl__datareader_start(drs)) {
+        LOG(ERROR, "libxl__datareader_start() fails");
+        goto out;
+    }
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0);
+}
+
+static void colo_read_svm_ready_done(libxl__egc *egc,
+                                     libxl__colo_save_state *css)
+{
+    libxl__domain_suspend_state *dss = CONTAINER_OF(css, *dss, css);
+
+    STATE_AO_GC(css->cds.ao);
+
+    if (css->section != LIBXL_COLO_SVM_READY) {
+        LOG(ERROR, "invalid section: %d, expected: %d",
+            css->section, LIBXL_COLO_SVM_READY);
+        goto out;
+    }
+
+    css->svm_running = true;
+    css->cds.callback = colo_preresume_cb;
+    libxl__checkpoint_devices_preresume(egc, &css->cds);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0);
+}
+
+static void colo_preresume_cb(libxl__egc *egc,
+                              libxl__checkpoint_devices_state *cds,
+                              int rc)
+{
+    libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+    libxl__domain_suspend_state *dss = CONTAINER_OF(css, *dss, css);
+
+    /* Convenience aliases */
+    libxl__datareader_state *const drs = &css->drs;
+
+    STATE_AO_GC(cds->ao);
+
+    if (rc) {
+        LOG(ERROR, "preresume fails");
+        goto out;
+    }
+
+    /* Resumes the domain and the device model */
+    if (libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1, 0)) {
+        LOG(ERROR, "cannot resume primary vm");
+        goto out;
+    }
+
+    /* read LIBXL_COLO_SVM_RESUMED */
+    memset(drs, 0, sizeof(*drs));
+    drs->ao = ao;
+    drs->readfd = css->recv_fd;
+    drs->readsize = sizeof(css->section);
+    drs->readwhat = "colo stream";
+    drs->callback = colo_common_read_done;
+    drs->buf = &css->section;
+    css->callback = colo_read_svm_resumed_done;
+
+    if (libxl__datareader_start(drs)) {
+        LOG(ERROR, "libxl__datareader_start() fails");
+        goto out;
+    }
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0);
+}
+
+static void colo_read_svm_resumed_done(libxl__egc *egc,
+                                       libxl__colo_save_state *css)
+{
+    int ok = 0;
+    libxl__domain_suspend_state *dss = CONTAINER_OF(css, *dss, css);
+
+    STATE_AO_GC(css->cds.ao);
+
+    if (css->section != LIBXL_COLO_SVM_RESUMED) {
+        LOG(ERROR, "invalid section: %d, expected: %d",
+            css->section, LIBXL_COLO_SVM_RESUMED);
+        goto out;
+    }
+
+    ok = 1;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
+
+
+/* ===================== colo: wait new checkpoint ===================== */
+/*
+ * Do the following things:
+ * 1. do commit
+ * 2. wait for a new checkpoint
+ * 3. write LIBXL_COLO_NEW_CHECKPOINT
+ */
+static void colo_device_commit_cb(libxl__egc *egc,
+                                  libxl__checkpoint_devices_state *cds,
+                                  int rc);
+static void colo_start_new_checkpoint(libxl__egc *egc,
+                                      libxl__checkpoint_devices_state *cds,
+                                      int rc);
+static void colo_send_data_done(libxl__egc *egc,
+                                libxl__datacopier_state *dc,
+                                int onwrite, int errnoval);
+
+void libxl__colo_save_domain_checkpoint_callback(void *data)
+{
+    libxl__save_helper_state *shs = data;
+    libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs);
+    libxl__egc *egc = dss->shs.egc;
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *const cds = &dss->css.cds;
+
+    cds->callback = colo_device_commit_cb;
+    libxl__checkpoint_devices_commit(egc, cds);
+}
+
+static void colo_device_commit_cb(libxl__egc *egc,
+                                  libxl__checkpoint_devices_state *cds,
+                                  int rc)
+{
+    libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+    libxl__domain_suspend_state *dss = CONTAINER_OF(css, *dss, css);
+
+    STATE_AO_GC(cds->ao);
+
+    if (rc) {
+        LOG(ERROR, "commit fails");
+        goto out;
+    }
+
+    /* TODO: wait a new checkpoint */
+    colo_start_new_checkpoint(egc, cds, 0);
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0);
+}
+
+static void colo_start_new_checkpoint(libxl__egc *egc,
+                                      libxl__checkpoint_devices_state *cds,
+                                      int rc)
+{
+    libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+    libxl__domain_suspend_state *dss = CONTAINER_OF(css, *dss, css);
+    uint8_t section = LIBXL_COLO_NEW_CHECKPOINT;
+
+    /* Convenience aliases */
+    libxl__datacopier_state *const dc = &css->dc;
+
+    STATE_AO_GC(cds->ao);
+
+    if (rc)
+        goto out;
+
+    /* write LIBXL_COLO_NEW_CHECKPOINT */
+    memset(dc, 0, sizeof(*dc));
+    dc->ao = ao;
+    dc->readfd = -1;
+    dc->writefd = css->send_fd;
+    dc->maxsz = INT_MAX;
+    dc->copywhat = "new checkpoint is triggered";
+    dc->writewhat = "colo stream";
+    dc->callback = colo_send_data_done;
+
+    rc = libxl__datacopier_start(dc);
+    if (rc) {
+        LOG(ERROR, "libxl__datacopier_start() fails");
+        goto out;
+    }
+
+    /* tell slave that a new checkpoint is triggered */
+    libxl__datacopier_prefixdata(egc, dc, &section, sizeof(section));
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0);
+}
+
+static void colo_send_data_done(libxl__egc *egc,
+                                libxl__datacopier_state *dc,
+                                int onwrite, int errnoval)
+{
+    libxl__colo_save_state *css = CONTAINER_OF(dc, *css, dc);
+    libxl__domain_suspend_state *dss = CONTAINER_OF(css, *dss, css);
+    int ok;
+
+    STATE_AO_GC(dc->ao);
+
+    if (onwrite == -1 || errnoval) {
+        LOG(ERROR, "cannot start a new checkpoint");
+        ok = 0;
+        goto out;
+    }
+
+    /* Everything is OK */
+    ok = 1;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
+
+
+/* ===================== colo: common callback ===================== */
+static void colo_common_read_done(libxl__egc *egc,
+                                  libxl__datareader_state *drs,
+                                  ssize_t real_size, int errnoval)
+{
+    int ok = 0;
+    libxl__colo_save_state *css = CONTAINER_OF(drs, *css, drs);
+    libxl__domain_suspend_state *dss = CONTAINER_OF(css, *dss, css);
+    STATE_AO_GC(drs->ao);
+
+    if (real_size < drs->readsize) {
+        LOG(ERROR, "reading data fails: %lld", (long long)real_size);
+        goto out;
+    }
+
+    if (!css->callback) {
+        /* Everything is OK */
+        ok = 1;
+        goto out;
+    }
+
+    css->callback(egc, css);
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 769952c..450eb39 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -20,6 +20,7 @@
 #include "libxl_internal.h"
 #include "libxl_arch.h"
 #include "libxl_remus.h"
+#include "libxl_colo.h"
 
 #include <xc_dom.h>
 #include <xen/hvm/hvm_info_table.h>
@@ -1624,7 +1625,12 @@ void libxl__domain_suspend(libxl__egc *egc, libxl__domain_suspend_state *dss)
     }
 
     memset(callbacks, 0, sizeof(*callbacks));
-    if (r_info != NULL) {
+    if (r_info != NULL && r_info->colo) {
+        callbacks->suspend = libxl__colo_save_domain_suspend_callback;
+        callbacks->postcopy = libxl__colo_save_domain_resume_callback;
+        callbacks->checkpoint = libxl__colo_save_domain_checkpoint_callback;
+        callbacks->get_dirty_pfn = libxl__colo_save_get_dirty_pfn_callback;
+    } else if (r_info != NULL) {
         callbacks->suspend = libxl__remus_domain_suspend_callback;
         callbacks->postcopy = libxl__remus_domain_resume_callback;
         callbacks->checkpoint = libxl__remus_domain_checkpoint_callback;
@@ -1785,7 +1791,10 @@ static void domain_suspend_done(libxl__egc *egc,
         xc_suspend_evtchn_release(CTX->xch, CTX->xce, domid,
                            dss2->guest_evtchn.port, &dss2->guest_evtchn_lockfd);
 
-    if (dss->remus) {
+    if (dss->remus && dss->remus->colo) {
+        libxl__colo_save_teardown(egc, &dss->css, rc);
+        return;
+    } else if (dss->remus) {
         libxl__remus_teardown(egc, &dss->rs, rc);
         return;
     }
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index d2e3176..aebc972 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2521,7 +2521,7 @@ typedef struct libxl__save_helper_state {
 /*
  * The abstract checkpoint device layer exposes a common
  * set of API to [external] libxl for manipulating devices attached to
- * a guest protected by Remus. The device layer also exposes a set of
+ * a guest protected by Remus/COLO. The device layer also exposes a set of
  * [internal] interfaces that every device type must implement.
  *
  * The following API are exposed to libxl:
@@ -2539,7 +2539,7 @@ typedef struct libxl__save_helper_state {
  *  +libxl__checkpoint_devices_commit
  *
  * Each device type needs to implement the interfaces specified in
- * the libxl__checkpoint_device_subkind_ops if it wishes to support Remus.
+ * the libxl__checkpoint_device_subkind_ops if it wishes to support Remus/COLO.
  *
  * The high-level control flow through the checkpoint device layer is shown
  * below:
@@ -2564,7 +2564,7 @@ typedef struct libxl__checkpoint_device_subkind_ops libxl__checkpoint_device_sub
 
 /*
  * Interfaces to be implemented by every device type that wishes to
- * support Remus. Functions must be implemented unless otherwise
+ * support Remus/COLO. Functions must be implemented unless otherwise
  * stated. Many of these functions are asynchronous. They call
  * dev->aodev.callback when done.  The actual implementations may be
  * synchronous and call dev->aodev.callback directly (as the last
@@ -2719,6 +2719,25 @@ struct libxl__remus_state {
 
 _hidden int libxl__netbuffer_enabled(libxl__gc *gc);
 
+/*----- colo related state structure -----*/
+typedef struct libxl__colo_save_state libxl__colo_save_state;
+struct libxl__colo_save_state {
+    libxl__checkpoint_devices_state cds;
+    int send_fd;
+    int recv_fd;
+
+    /* private */
+    libxl__datacopier_state dc;
+    libxl__datareader_state drs;
+    uint8_t section;
+    uint64_t count;
+    uint64_t *buff;
+    /* read section and count, and then store it in temp_buff */
+    uint8_t temp_buff[9];
+    void (*callback)(libxl__egc *, libxl__colo_save_state *);
+    bool svm_running;
+};
+
 /*----- Domain suspend (save) state structure -----*/
 
 typedef struct libxl__domain_suspend_state libxl__domain_suspend_state;
@@ -2782,7 +2801,12 @@ struct libxl__domain_suspend_state {
     libxl__domain_suspend_state2 dss2;
     int hvm;
     int xcflags;
-    libxl__remus_state rs;
+    union {
+        /* for Remus */
+        libxl__remus_state rs;
+        /* for COLO */
+        libxl__colo_save_state css;
+    };
     libxl__save_helper_state shs;
     libxl__logdirty_switch logdirty;
     /* private for libxl__domain_save_device_model */
diff --git a/tools/libxl/libxl_save_msgs_gen.pl b/tools/libxl/libxl_save_msgs_gen.pl
index 0239cac..fbb2d67 100755
--- a/tools/libxl/libxl_save_msgs_gen.pl
+++ b/tools/libxl/libxl_save_msgs_gen.pl
@@ -36,6 +36,7 @@ our @msgs = (
                                               'unsigned long', 'console_mfn'] ],
     [  9, 'srW',    "complete",              [qw(int retval
                                                  int errnoval)] ],
+    [ 10, 'scxAB',  "get_dirty_pfn", [] ],
 );
 
 #----------------------------------------
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index ea51d1a..599f137 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -602,6 +602,7 @@ libxl_domain_remus_info = Struct("domain_remus_info",[
     ("netbuf",       bool),
     ("netbufscript", string),
     ("diskbuf",      bool),
+    ("colo",         bool)
     ])
 
 libxl_event_type = Enumeration("event_type", [
-- 
1.9.3

  parent reply	other threads:[~2014-08-08  7:01 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-08  7:00 [RFC Patch v2 00/45] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 01/45] copy the correct page to memory Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 02/45] csum the correct page Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 03/45] don't zero out ioreq page Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 04/45] Refactor domain_suspend_callback_common() Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 05/45] Update libxl__domain_resume() for colo Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 06/45] Update libxl__domain_suspend_common_switch_qemu_logdirty() " Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 07/45] Introduce a new internal API libxl__domain_unpause() Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 08/45] Update libxl__domain_unpause() to support qemu-xen Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 09/45] support to resume uncooperative HVM guests Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 10/45] update datecopier to support sending data only Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 11/45] introduce a new API to aync read data from fd Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 12/45] move remus related codes to libxl_remus.c Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 13/45] rename remus device to checkpoint device Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 14/45] adjust the indentation Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 15/45] don't touch remus in checkpoint_device Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 16/45] Update libxl_save_msgs_gen.pl to support return data from xl to xc Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 17/45] Allow slave sends data to master Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 18/45] secondary vm suspend/resume/checkpoint code Wen Congyang
2014-08-08  7:01 ` Wen Congyang [this message]
2014-08-08  7:01 ` [RFC Patch v2 20/45] xc_domain_save: flush cache before calling callbacks->postcopy() in colo mode Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 21/45] COLO: xc related codes Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 22/45] send store mfn and console mfn to xl before resuming secondary vm Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 23/45] implement the cmdline for COLO Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 24/45] HACK: do checkpoint per 20ms Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 25/45] colo: dynamic allocate aio_requests to avoid -EBUSY error Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 26/45] fix memory leak in block-remus Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 27/45] pass uuid to the callback td_open Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 28/45] return the correct dev path Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 29/45] blktap2: use correct way to get remus_image Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 30/45] don't call client_flush() when switching to unprotected mode Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 31/45] remus: fix bug in tdremus_close() Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 32/45] blktap2: use correct way to get free event id Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 33/45] blktap2: don't return negative " Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 34/45] blktap2: use correct way to define array Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 35/45] blktap2: connect to backup asynchronously Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 36/45] switch to unprotected mode before closing Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 37/45] blktap2: move async connect related codes to block-replication.c Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 38/45] blktap2: move ramdisk " Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 39/45] block-colo: implement colo disk replication Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 40/45] pass correct file to qemu if we use blktap2 Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 41/45] support blktap remus in xl Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 42/45] support blktap colo in xl: Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 43/45] update libxl__device_disk_from_xs_be() to support blktap device Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 44/45] libxl/colo: setup and control disk replication for blktap2 backends Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 45/45] x86/hvm: Always set pending event injection when loading VMC[BS] state Wen Congyang
2014-08-08  7:24   ` Jan Beulich
2014-08-08  7:29     ` Wen Congyang
2014-08-26 16:02   ` Jan Beulich
2014-08-27  0:46     ` Wen Congyang
2014-08-27 14:58       ` Aravind Gopalakrishnan
2014-08-28  1:04         ` Wen Congyang
2014-08-28  8:54           ` Andrew Cooper
2014-08-28 11:17             ` Wen Congyang
2014-08-28 11:31               ` Paul Durrant
2014-08-29  5:59                 ` Wen Congyang
2014-08-28  9:53         ` Tim Deegan
2014-08-27 23:24     ` Tian, Kevin
2014-08-27 15:02   ` Andrew Cooper
2014-08-08  7:01 ` [RFC Patch v2 46/45] Introduce "xen-load-devices-state" Wen Congyang
2014-08-08  7:19 ` [RFC Patch v2 00/45] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Jan Beulich
2014-08-08  7:39   ` Wen Congyang
2014-08-08  8:21   ` Wen Congyang
2014-08-08  9:02     ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1407481305-19808-20-git-send-email-wency@cn.fujitsu.com \
    --to=wency@cn.fujitsu.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=Ian.Jackson@eu.citrix.com \
    --cc=eddie.dong@intel.com \
    --cc=laijs@cn.fujitsu.com \
    --cc=xen-devel@lists.xen.org \
    --cc=yanghy@cn.fujitsu.com \
    --cc=yunhong.jiang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).