From: Wen Congyang <wency@cn.fujitsu.com>
To: Dong Eddie <eddie.dong@intel.com>,
Lai Jiangshan <laijs@cn.fujitsu.com>,
xen-devl <xen-devel@lists.xen.org>,
Shriram Rajagopalan <rshriram@cs.ubc.ca>
Cc: Jiang Yunhong <yunhong.jiang@intel.com>,
Wen Congyang <wency@cn.fujitsu.com>,
Ye Wei <wei.ye1987@gmail.com>, Xu Yao <xuyao.xu@huawei.com>,
Hong Tao <bobby.hong@huawei.com>
Subject: [RFC Patch v2 10/16] colo: implement restore_callbacks finish_restore()
Date: Thu, 11 Jul 2013 16:35:42 +0800 [thread overview]
Message-ID: <1373531748-12547-11-git-send-email-wency@cn.fujitsu.com> (raw)
In-Reply-To: <1373531748-12547-1-git-send-email-wency@cn.fujitsu.com>
This patch implements restore callbacks for colo:
1. finish_store():
We run xc_restore in XendCheckpoint.py. We communicate with
XendCheckpoint.py like this:
a. write "finish\n" to stdout when we are ready to resume the vm.
b. XendCheckpoint.py writes "resume" when the vm is resumed
c. write "resume" to master when postresume is done
d. "continue" is read from master when a new checkpoint begins
e. write "suspend" to master when the vm is suspended
f. "start" is read from master when primary begins to transfer
dirty pages.
SVM is running in colo mode, so we should suspend it to sync the state
and resume it. We need to fix p2m_frame_list_list before resuming the
SVM. The content of p2m_frame_list_list should be cached after suspending
SVM.
Signed-off-by: Ye Wei <wei.ye1987@gmail.com>
Signed-off-by: Jiang Yunhong <yunhong.jiang@intel.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
tools/libxc/Makefile | 6 +-
tools/libxc/xc_domain_restore_colo.c | 335 ++++++++++++++++++++++++++++++++++
tools/libxc/xc_save_restore_colo.h | 1 +
tools/libxl/Makefile | 2 +-
tools/xcutils/Makefile | 4 +-
5 files changed, 342 insertions(+), 6 deletions(-)
diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile
index 70994b9..92d11af 100644
--- a/tools/libxc/Makefile
+++ b/tools/libxc/Makefile
@@ -49,7 +49,7 @@ GUEST_SRCS-y += xc_nomigrate.c
endif
vpath %.c ../../xen/common/libelf
-CFLAGS += -I../../xen/common/libelf
+CFLAGS += -I../../xen/common/libelf -I../xenstore
ELF_SRCS-y += libelf-tools.c libelf-loader.c
ELF_SRCS-y += libelf-dominfo.c
@@ -199,8 +199,8 @@ xc_dom_bzimageloader.o: CFLAGS += $(call zlib-options,D)
xc_dom_bzimageloader.opic: CFLAGS += $(call zlib-options,D)
libxenguest.so.$(MAJOR).$(MINOR): COMPRESSION_LIBS = $(call zlib-options,l)
-libxenguest.so.$(MAJOR).$(MINOR): $(GUEST_PIC_OBJS) libxenctrl.so
- $(CC) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libxenguest.so.$(MAJOR) $(SHLIB_LDFLAGS) -o $@ $(GUEST_PIC_OBJS) $(COMPRESSION_LIBS) -lz $(LDLIBS_libxenctrl) $(PTHREAD_LIBS) $(APPEND_LDFLAGS)
+libxenguest.so.$(MAJOR).$(MINOR): $(GUEST_PIC_OBJS) libxenctrl.so $(LDLIBS_libxenstore)
+ $(CC) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libxenguest.so.$(MAJOR) $(SHLIB_LDFLAGS) -o $@ $(GUEST_PIC_OBJS) $(COMPRESSION_LIBS) -lz $(LDLIBS_libxenctrl) $(PTHREAD_LIBS) $(LDLIBS_libxenstore) $(APPEND_LDFLAGS)
xenctrl_osdep_ENOSYS.so: $(OSDEP_PIC_OBJS) libxenctrl.so
$(CC) -g $(LDFLAGS) $(SHLIB_LDFLAGS) -o $@ $(OSDEP_PIC_OBJS) $(LDLIBS_libxenctrl) $(APPEND_LDFLAGS)
diff --git a/tools/libxc/xc_domain_restore_colo.c b/tools/libxc/xc_domain_restore_colo.c
index 70cdd16..6b87a2d 100644
--- a/tools/libxc/xc_domain_restore_colo.c
+++ b/tools/libxc/xc_domain_restore_colo.c
@@ -2,6 +2,7 @@
#include <sys/types.h>
#include <sys/wait.h>
#include <xc_bitops.h>
+#include <xenstore.h>
struct restore_colo_data
{
@@ -602,3 +603,337 @@ int colo_update_p2m_table(struct restore_data *comm_data, void *data)
return 0;
}
+
+static int update_pfn_type(xc_interface *xch, uint32_t dom, int count, xen_pfn_t *pfn_batch,
+ xen_pfn_t *pfn_type_batch, xen_pfn_t *pfn_type)
+{
+ unsigned long k;
+
+ if (xc_get_pfn_type_batch(xch, dom, count, pfn_type_batch))
+ {
+ ERROR("xc_get_pfn_type_batch for slaver failed");
+ return -1;
+ }
+
+ for (k = 0; k < count; k++)
+ pfn_type[pfn_batch[k]] = pfn_type_batch[k] & XEN_DOMCTL_PFINFO_LTAB_MASK;
+
+ return 0;
+}
+
+static int install_fw_network(struct restore_data *comm_data)
+{
+ pid_t pid;
+ xc_interface *xch = comm_data->xch;
+ int status;
+ int rc;
+
+ char vif[20];
+
+ snprintf(vif, sizeof(vif), "vif%u.0", comm_data->dom);
+
+ pid = vfork();
+ if (pid < 0) {
+ ERROR("vfork fails");
+ return -1;
+ }
+
+ if (pid > 0) {
+ rc = waitpid(pid, &status, 0);
+ if (rc != pid || !WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+ ERROR("getting child status fails");
+ return -1;
+ }
+
+ return 0;
+ }
+
+ execl("/etc/xen/scripts/network-colo", "network-colo", "slaver", "install", vif, "eth0", NULL);
+ ERROR("execl fails");
+ return -1;
+}
+
+static int get_p2m_list(struct restore_data *comm_data,
+ struct restore_colo_data *colo_data,
+ xen_pfn_t *p2m_fll,
+ xen_pfn_t **p2m_frame_list_list_p,
+ char **p2m_frame_list_p,
+ int prot)
+{
+ struct domain_info_context *dinfo = comm_data->dinfo;
+ xc_interface *xch = comm_data->xch;
+ uint32_t dom = comm_data->dom;
+ shared_info_t *shinfo = NULL;
+ xc_dominfo_t info;
+ xen_pfn_t *p2m_frame_list_list = NULL;
+ char *p2m_frame_list = NULL;
+ int rc = -1;
+
+ if ( xc_domain_getinfo(xch, dom, 1, &info) != 1 )
+ {
+ ERROR("Could not get domain info");
+ return -1;
+ }
+
+ /* Map the shared info frame */
+ shinfo = xc_map_foreign_range(xch, dom, PAGE_SIZE,
+ prot,
+ info.shared_info_frame);
+ if ( shinfo == NULL )
+ {
+ ERROR("Couldn't map shared info");
+ return -1;
+ }
+
+ if (p2m_fll == NULL)
+ shinfo->arch.pfn_to_mfn_frame_list_list = colo_data->p2m_fll;
+ else
+ *p2m_fll = shinfo->arch.pfn_to_mfn_frame_list_list;
+
+ p2m_frame_list_list =
+ xc_map_foreign_range(xch, dom, PAGE_SIZE, prot,
+ shinfo->arch.pfn_to_mfn_frame_list_list);
+ if ( p2m_frame_list_list == NULL )
+ {
+ ERROR("Couldn't map p2m_frame_list_list");
+ goto error;
+ }
+
+ p2m_frame_list = xc_map_foreign_pages(xch, dom, prot,
+ p2m_frame_list_list,
+ P2M_FLL_ENTRIES);
+ if ( p2m_frame_list == NULL )
+ {
+ ERROR("Couldn't map p2m_frame_list");
+ goto error;
+ }
+
+ *p2m_frame_list_list_p = p2m_frame_list_list;
+ *p2m_frame_list_p = p2m_frame_list;
+ rc = 0;
+
+error:
+ munmap(shinfo, PAGE_SIZE);
+ if (rc && p2m_frame_list_list)
+ munmap(p2m_frame_list_list, PAGE_SIZE);
+
+ return rc;
+}
+
+static int update_p2m_list(struct restore_data *comm_data,
+ struct restore_colo_data *colo_data)
+{
+ struct domain_info_context *dinfo = comm_data->dinfo;
+ xen_pfn_t *p2m_frame_list_list = NULL;
+ char *p2m_frame_list = NULL;
+ int rc;
+
+ rc = get_p2m_list(comm_data, colo_data, NULL, &p2m_frame_list_list,
+ &p2m_frame_list, PROT_READ | PROT_WRITE);
+ if (rc)
+ return rc;
+
+ memcpy(p2m_frame_list_list, colo_data->p2m_frame_list_list, PAGE_SIZE);
+ memcpy(p2m_frame_list, colo_data->p2m_frame_list, PAGE_SIZE * P2M_FLL_ENTRIES);
+
+ munmap(p2m_frame_list_list, PAGE_SIZE);
+ munmap(p2m_frame_list, PAGE_SIZE * P2M_FLL_ENTRIES);
+
+ return 0;
+}
+
+static int cache_p2m_list(struct restore_data *comm_data,
+ struct restore_colo_data *colo_data)
+{
+ struct domain_info_context *dinfo = comm_data->dinfo;
+ xen_pfn_t *p2m_frame_list_list = NULL;
+ char *p2m_frame_list = NULL;
+ int rc;
+
+ rc = get_p2m_list(comm_data, colo_data, &colo_data->p2m_fll,
+ &p2m_frame_list_list, &p2m_frame_list, PROT_READ);
+ if (rc)
+ return rc;
+
+ memcpy(colo_data->p2m_frame_list_list, p2m_frame_list_list, PAGE_SIZE);
+ memcpy(colo_data->p2m_frame_list, p2m_frame_list, PAGE_SIZE * P2M_FLL_ENTRIES);
+
+ munmap(p2m_frame_list_list, PAGE_SIZE);
+ munmap(p2m_frame_list, PAGE_SIZE * P2M_FLL_ENTRIES);
+
+ return 0;
+}
+
+/* we are ready to start the guest when this functions is called. We
+ * will return until we need to do a new checkpoint or some error occurs.
+ *
+ * communication with python and master
+ * python code restore code master comment
+ * <=== "continue" a new checkpoint begins
+ * "suspend" ===> SVM is suspended
+ * "start" getting dirty pages begins
+ * <=== "finish\n" SVM is ready
+ * "resume" ===> SVM is resumed
+ * "resume" ===> postresume is done
+ *
+ * return value:
+ * -1: error
+ * 0: continue to start vm
+ * 1: continue to do a checkpoint
+ */
+int colo_finish_restore(struct restore_data *comm_data, void *data)
+{
+ struct restore_colo_data *colo_data = data;
+ xc_interface *xch = comm_data->xch;
+ uint32_t dom = comm_data->dom;
+ struct domain_info_context *dinfo = comm_data->dinfo;
+ xc_evtchn *xce = colo_data->xce;
+ unsigned long *pfn_batch_slaver = colo_data->pfn_batch_slaver;
+ unsigned long *pfn_type_batch_slaver = colo_data->pfn_type_batch_slaver;
+ unsigned long *pfn_type_slaver = colo_data->pfn_type_slaver;
+
+ unsigned long i, j;
+ int rc;
+ char str[10];
+ int remote_port;
+ int local_port = colo_data->local_port;
+
+ /* fix pfn_to_mfn_frame_list_list */
+ if (!colo_data->first_time)
+ {
+ if (update_p2m_list(comm_data, colo_data) < 0)
+ return -1;
+ }
+
+ /* output the store-mfn & console-mfn */
+ printf("store-mfn %li\n", comm_data->store_mfn);
+ printf("console-mfn %li\n", comm_data->console_mfn);
+
+ /* notify python code checkpoint finish */
+ printf("finish\n");
+ fflush(stdout);
+
+ /* we need to know which pages are dirty to restore the guest */
+ if (xc_shadow_control(xch, dom, XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
+ NULL, 0, NULL, 0, NULL) < 0 )
+ {
+ ERROR("enabling logdirty fails");
+ return -1;
+ }
+
+ /* wait domain resume, then connect the suspend evtchn */
+ read_exact(0, str, 6);
+ str[6] = '\0';
+ if (strcmp(str, "resume"))
+ {
+ ERROR("read %s, expect resume", str);
+ return -1;
+ }
+
+ if (colo_data->first_time) {
+ if (install_fw_network(comm_data) < 0)
+ return -1;
+ }
+
+ /* notify master vm is resumed */
+ write_exact(comm_data->io_fd, "resume", 6);
+
+ if (colo_data->first_time) {
+ sleep(10);
+ remote_port = xs_suspend_evtchn_port(dom);
+ if (remote_port < 0) {
+ ERROR("getting remote suspend port fails");
+ return -1;
+ }
+
+ local_port = xc_suspend_evtchn_init(xch, xce, dom, remote_port);
+ if (local_port < 0) {
+ ERROR("initializing suspend evtchn fails");
+ return -1;
+ }
+
+ colo_data->local_port = local_port;
+ }
+
+ /* wait for the next checkpoint */
+ read_exact(comm_data->io_fd, str, 8);
+ str[8] = '\0';
+ if (strcmp(str, "continue"))
+ {
+ ERROR("wait for a new checkpoint fails");
+ /* start the guest now? */
+ return 0;
+ }
+
+ /* notify the suspend evtchn */
+ rc = xc_evtchn_notify(xce, local_port);
+ if (rc < 0)
+ {
+ ERROR("notifying the suspend evtchn fails");
+ return -1;
+ }
+
+ rc = xc_await_suspend(xch, xce, local_port);
+ if (rc < 0)
+ {
+ ERROR("waiting suspend fails");
+ return -1;
+ }
+
+ /* notify master suspend is done */
+ write_exact(comm_data->io_fd, "suspend", 7);
+ read_exact(comm_data->io_fd, str, 5);
+ str[5] = '\0';
+ if (strcmp(str, "start"))
+ return -1;
+
+ if (xc_shadow_control(xch, dom, XEN_DOMCTL_SHADOW_OP_CLEAN,
+ HYPERCALL_BUFFER(dirty_pages), dinfo->p2m_size,
+ NULL, 0, NULL) != dinfo->p2m_size)
+ {
+ ERROR("getting slaver dirty fails");
+ return -1;
+ }
+
+ if (xc_shadow_control(xch, dom, XEN_DOMCTL_SHADOW_OP_OFF, NULL, 0, NULL,
+ 0, NULL) < 0 )
+ {
+ ERROR("disabling dirty-log fails");
+ return -1;
+ }
+
+ j = 0;
+ for (i = 0; i < colo_data->max_mem_pfn; i++)
+ {
+ if ( !test_bit(i, colo_data->dirty_pages) )
+ continue;
+
+ pfn_batch_slaver[j] = i;
+ pfn_type_batch_slaver[j++] = comm_data->p2m[i];
+ if (j == MAX_BATCH_SIZE)
+ {
+ if (update_pfn_type(xch, dom, j, pfn_batch_slaver,
+ pfn_type_batch_slaver, pfn_type_slaver))
+ {
+ return -1;
+ }
+ j = 0;
+ }
+ }
+
+ if (j)
+ {
+ if (update_pfn_type(xch, dom, j, pfn_batch_slaver,
+ pfn_type_batch_slaver, pfn_type_slaver))
+ {
+ return -1;
+ }
+ }
+
+ if (cache_p2m_list(comm_data, colo_data) < 0)
+ return -1;
+
+ colo_data->first_time = 0;
+
+ return 1;
+}
diff --git a/tools/libxc/xc_save_restore_colo.h b/tools/libxc/xc_save_restore_colo.h
index 98e5128..57df750 100644
--- a/tools/libxc/xc_save_restore_colo.h
+++ b/tools/libxc/xc_save_restore_colo.h
@@ -9,5 +9,6 @@ extern void colo_free(struct restore_data *, void *);
extern char *colo_get_page(struct restore_data *, void *, unsigned long);
extern int colo_flush_memory(struct restore_data *, void *);
extern int colo_update_p2m_table(struct restore_data *, void *);
+extern int colo_finish_restore(struct restore_data *, void *);
#endif
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index cf214bb..36b924d 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -192,7 +192,7 @@ xl: $(XL_OBJS) libxlutil.so libxenlight.so
$(CC) $(LDFLAGS) -o $@ $(XL_OBJS) libxlutil.so $(LDLIBS_libxenlight) $(LDLIBS_libxenctrl) -lyajl $(APPEND_LDFLAGS)
libxl-save-helper: $(SAVE_HELPER_OBJS) libxenlight.so
- $(CC) $(LDFLAGS) -o $@ $(SAVE_HELPER_OBJS) $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) $(APPEND_LDFLAGS)
+ $(CC) $(LDFLAGS) -o $@ $(SAVE_HELPER_OBJS) $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) $(LDLIBS_libxenstore) $(APPEND_LDFLAGS)
testidl: testidl.o libxlutil.so libxenlight.so
$(CC) $(LDFLAGS) -o $@ testidl.o libxlutil.so $(LDLIBS_libxenlight) $(LDLIBS_libxenctrl) $(APPEND_LDFLAGS)
diff --git a/tools/xcutils/Makefile b/tools/xcutils/Makefile
index 6c502f1..51f3f0e 100644
--- a/tools/xcutils/Makefile
+++ b/tools/xcutils/Makefile
@@ -27,13 +27,13 @@ all: build
build: $(PROGRAMS)
xc_restore: xc_restore.o
- $(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) $(APPEND_LDFLAGS)
+ $(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) $(LDLIBS_libxenstore) $(APPEND_LDFLAGS)
xc_save: xc_save.o
$(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) $(LDLIBS_libxenstore) $(APPEND_LDFLAGS)
readnotes: readnotes.o
- $(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) $(APPEND_LDFLAGS)
+ $(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) $(LDLIBS_libxenstore) $(APPEND_LDFLAGS)
lsevtchn: lsevtchn.o
$(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS_libxenctrl) $(APPEND_LDFLAGS)
--
1.7.4
next prev parent reply other threads:[~2013-07-11 8:35 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-07-11 8:35 [RFC Patch v2 00/16] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 01/16] xen: introduce new hypercall to reset vcpu Wen Congyang
2013-07-11 9:44 ` Andrew Cooper
2013-07-11 9:58 ` Wen Congyang
2013-07-11 10:01 ` Ian Campbell
2013-08-01 11:48 ` Tim Deegan
2013-08-06 6:47 ` Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 02/16] block-remus: introduce colo mode Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 03/16] block-remus: introduce a interface to allow the user specify which mode the backup end uses Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 04/16] dominfo.completeRestore() will be called more than once in colo mode Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 05/16] xc_domain_restore: introduce restore_callbacks for colo Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 06/16] colo: implement restore_callbacks init()/free() Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 07/16] colo: implement restore_callbacks get_page() Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 08/16] colo: implement restore_callbacks flush_memory Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 09/16] colo: implement restore_callbacks update_p2m() Wen Congyang
2013-07-11 8:35 ` Wen Congyang [this message]
2013-07-11 9:40 ` [RFC Patch v2 10/16] colo: implement restore_callbacks finish_restore() Ian Campbell
2013-07-11 9:54 ` Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 11/16] xc_restore: implement for colo Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 12/16] XendCheckpoint: implement colo Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 13/16] xc_domain_save: flush cache before calling callbacks->postcopy() Wen Congyang
2013-07-11 13:43 ` Andrew Cooper
2013-07-12 1:36 ` Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 14/16] add callback to configure network for colo Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 15/16] xc_domain_save: implement save_callbacks " Wen Congyang
2013-07-11 13:52 ` Andrew Cooper
2013-07-11 8:35 ` [RFC Patch v2 16/16] remus: implement colo mode Wen Congyang
2013-07-11 9:37 ` [RFC Patch v2 00/16] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Andrew Cooper
2013-07-11 9:40 ` Ian Campbell
2013-07-14 14:33 ` Shriram Rajagopalan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1373531748-12547-11-git-send-email-wency@cn.fujitsu.com \
--to=wency@cn.fujitsu.com \
--cc=bobby.hong@huawei.com \
--cc=eddie.dong@intel.com \
--cc=laijs@cn.fujitsu.com \
--cc=rshriram@cs.ubc.ca \
--cc=wei.ye1987@gmail.com \
--cc=xen-devel@lists.xen.org \
--cc=xuyao.xu@huawei.com \
--cc=yunhong.jiang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).