From: Wen Congyang <wency@cn.fujitsu.com>
To: xen devel <xen-devel@lists.xen.org>
Cc: Ian Campbell <Ian.Campbell@citrix.com>,
Wen Congyang <wency@cn.fujitsu.com>,
Ian Jackson <Ian.Jackson@eu.citrix.com>,
Jiang Yunhong <yunhong.jiang@intel.com>,
Dong Eddie <eddie.dong@intel.com>,
Yang Hongyang <yanghy@cn.fujitsu.com>,
Lai Jiangshan <laijs@cn.fujitsu.com>
Subject: [RFC Patch v2 21/45] COLO: xc related codes
Date: Fri, 8 Aug 2014 15:01:20 +0800 [thread overview]
Message-ID: <1407481305-19808-22-git-send-email-wency@cn.fujitsu.com> (raw)
In-Reply-To: <1407481305-19808-1-git-send-email-wency@cn.fujitsu.com>
Save:
1. send XC_SAVE_ID_LAST_CHECKPOINT, so secondary vm can be resumed
2. call callbacks->get_dirty_pfn() after suspend primary vm if we
are doing checkpoint.
Restore:
1. call the callbacks resume/checkpoint/suspend if secondary vm's
status is the same as primary vm's status.
2. zero out tdata because we will use it zero out pagebuf.tdata.
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
tools/libxc/xc_domain_restore.c | 44 ++++++++++++++++++++++++++++++++--
tools/libxc/xc_domain_save.c | 52 +++++++++++++++++++++++++++++++++++++++--
2 files changed, 92 insertions(+), 4 deletions(-)
diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index 2d6139c..fe188f4 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -1454,7 +1454,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
int nraces = 0;
/* The new domain's shared-info frame number. */
- unsigned long shared_info_frame;
+ unsigned long shared_info_frame = 0;
unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
shared_info_any_t *old_shared_info =
(shared_info_any_t *)shared_info_page;
@@ -1504,6 +1504,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
DPRINTF("%s: starting restore of new domid %u", __func__, dom);
+ n = m = 0;
+
pagebuf_init(&pagebuf);
memset(&tailbuf, 0, sizeof(tailbuf));
tailbuf.ishvm = hvm;
@@ -1629,7 +1631,6 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
* We uncanonicalise page tables as we go.
*/
- n = m = 0;
loadpages:
for ( ; ; )
{
@@ -1793,6 +1794,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
goto finish;
}
+new_checkpoint:
// DPRINTF("Buffered checkpoint\n");
if ( pagebuf_get(xch, ctx, &pagebuf, io_fd, dom) ) {
@@ -2292,6 +2294,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
free(tdata.data);
goto out;
}
+ memset(&tdata, 0, sizeof(tdata));
}
/* Dump the QEMU state to a state file for QEMU to load */
@@ -2357,6 +2360,43 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
rc = 0;
out:
+ if ( !rc && callbacks->checkpoint )
+ {
+#define HANDLE_CALLBACK_RETURN_VALUE(frc) \
+ do { \
+ if ( frc == 0 ) \
+ { \
+ /* Some internal error happens */ \
+ rc = 1; \
+ goto out; \
+ } \
+ else if ( frc == 2 ) \
+ { \
+ /* Reading/writing error, do failover */ \
+ rc = 0; \
+ goto failover; \
+ } \
+ } while (0)
+ /* COLO */
+
+ /* TODO: call restore_results */
+
+ /* Resume secondary vm */
+ frc = callbacks->postcopy(callbacks->data);
+ HANDLE_CALLBACK_RETURN_VALUE(frc);
+
+ /* wait for new checkpoint */
+ frc = callbacks->checkpoint(callbacks->data);
+ HANDLE_CALLBACK_RETURN_VALUE(frc);
+
+ /* suspend secondary vm */
+ frc = callbacks->suspend(callbacks->data);
+ HANDLE_CALLBACK_RETURN_VALUE(frc);
+
+ goto new_checkpoint;
+ }
+
+failover:
if ( (rc != 0) && (dom != 0) )
xc_domain_destroy(xch, dom);
xc_hypercall_buffer_free(xch, ctxt);
diff --git a/tools/libxc/xc_domain_save.c b/tools/libxc/xc_domain_save.c
index 61caa47..79cc2c8 100644
--- a/tools/libxc/xc_domain_save.c
+++ b/tools/libxc/xc_domain_save.c
@@ -377,6 +377,31 @@ static int suspend_and_state(int (*suspend)(void*), void* data,
return 0;
}
+static int update_dirty_bitmap(uint8_t *(*get_dirty_pfn)(void *), void *data,
+ unsigned long p2m_size, unsigned long *to_send)
+{
+ uint64_t *pfn_list;
+ uint64_t count, i;
+ uint64_t pfn;
+
+ pfn_list = (uint64_t *)get_dirty_pfn(data);
+ assert(pfn_list);
+
+ count = pfn_list[0];
+ for (i = 0; i < count; i++) {
+ pfn = pfn_list[i + 1];
+ if (pfn > p2m_size) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ set_bit(pfn, to_send);
+ }
+
+ free(pfn_list);
+ return 0;
+}
+
/*
** Map the top-level page of MFNs from the guest. The guest might not have
** finished resuming from a previous restore operation, so we wait a while for
@@ -1769,11 +1794,14 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
free(buf);
}
- if ( !callbacks->checkpoint )
+ if ( !callbacks->checkpoint || callbacks->get_dirty_pfn )
{
/*
* If this is not a checkpointed save then this must be the first and
* last checkpoint.
+ *
+ * If we are in colo mode, send last checkpoint to resume secondary
+ * vm.
*/
i = XC_SAVE_ID_LAST_CHECKPOINT;
if ( wrexact(io_fd, &i, sizeof(int)) )
@@ -2119,7 +2147,14 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
* primary vm and secondary vm now.
*/
if ( !rc && callbacks->postcopy && callbacks->get_dirty_pfn )
- callbacks->postcopy(callbacks->data);
+ {
+ if ( !callbacks->postcopy(callbacks->data) )
+ {
+ ERROR("postcopy fails");
+ /* postcopy may be implemented in libxl, no way to get errno */
+ rc = -1;
+ }
+ }
/* Enable compression now, finally */
compressing = (flags & XCFLAGS_CHECKPOINT_COMPRESS);
@@ -2136,8 +2171,11 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
io_fd, dom, &info) )
{
ERROR("Domain appears not to have suspended");
+ /* postcopy may be implemented in libxl, no way to get errno */
+ errno = -1;
goto out;
}
+
DPRINTF("SUSPEND shinfo %08lx\n", info.shared_info_frame);
print_stats(xch, dom, 0, &time_stats, &shadow_stats, 1);
@@ -2148,6 +2186,16 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
PERROR("Error flushing shadow PT");
}
+ if ( callbacks->get_dirty_pfn )
+ {
+ if ( update_dirty_bitmap(callbacks->get_dirty_pfn, callbacks->data,
+ dinfo->p2m_size, to_send) )
+ {
+ ERROR("getting secondary vm's dirty pages failed");
+ goto out;
+ }
+ }
+
goto copypages;
}
--
1.9.3
next prev parent reply other threads:[~2014-08-08 7:01 UTC|newest]
Thread overview: 64+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-08-08 7:00 [RFC Patch v2 00/45] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 01/45] copy the correct page to memory Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 02/45] csum the correct page Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 03/45] don't zero out ioreq page Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 04/45] Refactor domain_suspend_callback_common() Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 05/45] Update libxl__domain_resume() for colo Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 06/45] Update libxl__domain_suspend_common_switch_qemu_logdirty() " Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 07/45] Introduce a new internal API libxl__domain_unpause() Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 08/45] Update libxl__domain_unpause() to support qemu-xen Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 09/45] support to resume uncooperative HVM guests Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 10/45] update datecopier to support sending data only Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 11/45] introduce a new API to aync read data from fd Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 12/45] move remus related codes to libxl_remus.c Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 13/45] rename remus device to checkpoint device Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 14/45] adjust the indentation Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 15/45] don't touch remus in checkpoint_device Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 16/45] Update libxl_save_msgs_gen.pl to support return data from xl to xc Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 17/45] Allow slave sends data to master Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 18/45] secondary vm suspend/resume/checkpoint code Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 19/45] primary vm suspend/get_dirty_pfn/resume/checkpoint code Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 20/45] xc_domain_save: flush cache before calling callbacks->postcopy() in colo mode Wen Congyang
2014-08-08 7:01 ` Wen Congyang [this message]
2014-08-08 7:01 ` [RFC Patch v2 22/45] send store mfn and console mfn to xl before resuming secondary vm Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 23/45] implement the cmdline for COLO Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 24/45] HACK: do checkpoint per 20ms Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 25/45] colo: dynamic allocate aio_requests to avoid -EBUSY error Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 26/45] fix memory leak in block-remus Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 27/45] pass uuid to the callback td_open Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 28/45] return the correct dev path Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 29/45] blktap2: use correct way to get remus_image Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 30/45] don't call client_flush() when switching to unprotected mode Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 31/45] remus: fix bug in tdremus_close() Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 32/45] blktap2: use correct way to get free event id Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 33/45] blktap2: don't return negative " Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 34/45] blktap2: use correct way to define array Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 35/45] blktap2: connect to backup asynchronously Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 36/45] switch to unprotected mode before closing Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 37/45] blktap2: move async connect related codes to block-replication.c Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 38/45] blktap2: move ramdisk " Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 39/45] block-colo: implement colo disk replication Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 40/45] pass correct file to qemu if we use blktap2 Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 41/45] support blktap remus in xl Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 42/45] support blktap colo in xl: Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 43/45] update libxl__device_disk_from_xs_be() to support blktap device Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 44/45] libxl/colo: setup and control disk replication for blktap2 backends Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 45/45] x86/hvm: Always set pending event injection when loading VMC[BS] state Wen Congyang
2014-08-08 7:24 ` Jan Beulich
2014-08-08 7:29 ` Wen Congyang
2014-08-26 16:02 ` Jan Beulich
2014-08-27 0:46 ` Wen Congyang
2014-08-27 14:58 ` Aravind Gopalakrishnan
2014-08-28 1:04 ` Wen Congyang
2014-08-28 8:54 ` Andrew Cooper
2014-08-28 11:17 ` Wen Congyang
2014-08-28 11:31 ` Paul Durrant
2014-08-29 5:59 ` Wen Congyang
2014-08-28 9:53 ` Tim Deegan
2014-08-27 23:24 ` Tian, Kevin
2014-08-27 15:02 ` Andrew Cooper
2014-08-08 7:01 ` [RFC Patch v2 46/45] Introduce "xen-load-devices-state" Wen Congyang
2014-08-08 7:19 ` [RFC Patch v2 00/45] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Jan Beulich
2014-08-08 7:39 ` Wen Congyang
2014-08-08 8:21 ` Wen Congyang
2014-08-08 9:02 ` Jan Beulich
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1407481305-19808-22-git-send-email-wency@cn.fujitsu.com \
--to=wency@cn.fujitsu.com \
--cc=Ian.Campbell@citrix.com \
--cc=Ian.Jackson@eu.citrix.com \
--cc=eddie.dong@intel.com \
--cc=laijs@cn.fujitsu.com \
--cc=xen-devel@lists.xen.org \
--cc=yanghy@cn.fujitsu.com \
--cc=yunhong.jiang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).