[RFC Patch v3 10/22] COLO: xc related codes

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

From: Wen Congyang <wency@cn.fujitsu.com>
To: xen devel <xen-devel@lists.xen.org>
Cc: Ian Campbell <Ian.Campbell@citrix.com>,
	Wen Congyang <wency@cn.fujitsu.com>,
	Ian Jackson <Ian.Jackson@eu.citrix.com>,
	Jiang Yunhong <yunhong.jiang@intel.com>,
	Dong Eddie <eddie.dong@intel.com>,
	Yang Hongyang <yanghy@cn.fujitsu.com>,
	Lai Jiangshan <laijs@cn.fujitsu.com>
Subject: [RFC Patch v3 10/22] COLO: xc related codes
Date: Fri, 5 Sep 2014 17:25:45 +0800	[thread overview]
Message-ID: <1409909158-19243-11-git-send-email-wency@cn.fujitsu.com> (raw)
In-Reply-To: <1409909158-19243-1-git-send-email-wency@cn.fujitsu.com>

Save:
1. send XC_SAVE_ID_LAST_CHECKPOINT, so secondary vm can be resumed
2. call callbacks->get_dirty_pfn() after suspend primary vm if we
   are doing checkpoint.

Restore:
1. call the callbacks resume/checkpoint/suspend if secondary vm's
   status is the same as primary vm's status.
2. zero out tdata because we will use it zero out pagebuf.tdata.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 tools/libxc/xc_domain_restore.c | 44 ++++++++++++++++++++++++++++++++--
 tools/libxc/xc_domain_save.c    | 52 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 92 insertions(+), 4 deletions(-)

diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index 21a6177..f7f683a 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -1454,7 +1454,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
     int nraces = 0;
 
     /* The new domain's shared-info frame number. */
-    unsigned long shared_info_frame;
+    unsigned long shared_info_frame = 0;
     unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
     shared_info_any_t *old_shared_info = 
         (shared_info_any_t *)shared_info_page;
@@ -1504,6 +1504,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
 
     DPRINTF("%s: starting restore of new domid %u", __func__, dom);
 
+    n = m = 0;
+
     pagebuf_init(&pagebuf);
     memset(&tailbuf, 0, sizeof(tailbuf));
     tailbuf.ishvm = hvm;
@@ -1629,7 +1631,6 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
      * We uncanonicalise page tables as we go.
      */
 
-    n = m = 0;
  loadpages:
     for ( ; ; )
     {
@@ -1793,6 +1794,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
         goto finish;
     }
 
+new_checkpoint:
     // DPRINTF("Buffered checkpoint\n");
 
     if ( pagebuf_get(xch, ctx, &pagebuf, io_fd, dom) ) {
@@ -2301,6 +2303,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
             free(tdata.data);
             goto out;
         }
+        memset(&tdata, 0, sizeof(tdata));
     }
 
     /* Dump the QEMU state to a state file for QEMU to load */
@@ -2367,6 +2370,43 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
     rc = 0;
 
  out:
+    if ( !rc && callbacks->checkpoint )
+    {
+#define HANDLE_CALLBACK_RETURN_VALUE(frc)                   \
+    do {                                                    \
+        if ( frc == 0 )                                     \
+        {                                                   \
+            /* Some internal error happens */               \
+            rc = 1;                                         \
+            goto out;                                       \
+        }                                                   \
+        else if ( frc == 2 )                                \
+        {                                                   \
+            /* Reading/writing error, do failover */        \
+            rc = 0;                                         \
+            goto failover;                                  \
+        }                                                   \
+    } while (0)
+        /* COLO */
+
+        /* TODO: call restore_results */
+
+        /* Resume secondary vm */
+        frc = callbacks->postcopy(callbacks->data);
+        HANDLE_CALLBACK_RETURN_VALUE(frc);
+
+        /* wait for new checkpoint */
+        frc = callbacks->checkpoint(callbacks->data);
+        HANDLE_CALLBACK_RETURN_VALUE(frc);
+
+        /* suspend secondary vm */
+        frc = callbacks->suspend(callbacks->data);
+        HANDLE_CALLBACK_RETURN_VALUE(frc);
+
+        goto new_checkpoint;
+    }
+
+failover:
     if ( (rc != 0) && (dom != 0) )
         xc_domain_destroy(xch, dom);
     xc_hypercall_buffer_free(xch, ctxt);
diff --git a/tools/libxc/xc_domain_save.c b/tools/libxc/xc_domain_save.c
index 61caa47..79cc2c8 100644
--- a/tools/libxc/xc_domain_save.c
+++ b/tools/libxc/xc_domain_save.c
@@ -377,6 +377,31 @@ static int suspend_and_state(int (*suspend)(void*), void* data,
     return 0;
 }
 
+static int update_dirty_bitmap(uint8_t *(*get_dirty_pfn)(void *), void *data,
+                               unsigned long p2m_size, unsigned long *to_send)
+{
+    uint64_t *pfn_list;
+    uint64_t count, i;
+    uint64_t pfn;
+
+    pfn_list = (uint64_t *)get_dirty_pfn(data);
+    assert(pfn_list);
+
+    count = pfn_list[0];
+    for (i = 0; i < count; i++) {
+        pfn = pfn_list[i + 1];
+        if (pfn > p2m_size) {
+            errno = EINVAL;
+            return -1;
+        }
+
+        set_bit(pfn, to_send);
+    }
+
+    free(pfn_list);
+    return 0;
+}
+
 /*
 ** Map the top-level page of MFNs from the guest. The guest might not have
 ** finished resuming from a previous restore operation, so we wait a while for
@@ -1769,11 +1794,14 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
         free(buf);
     }
 
-    if ( !callbacks->checkpoint )
+    if ( !callbacks->checkpoint || callbacks->get_dirty_pfn )
     {
         /*
          * If this is not a checkpointed save then this must be the first and
          * last checkpoint.
+         *
+         * If we are in colo mode, send last checkpoint to resume secondary
+         * vm.
          */
         i = XC_SAVE_ID_LAST_CHECKPOINT;
         if ( wrexact(io_fd, &i, sizeof(int)) )
@@ -2119,7 +2147,14 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
      * primary vm and secondary vm now.
      */
     if ( !rc && callbacks->postcopy && callbacks->get_dirty_pfn )
-        callbacks->postcopy(callbacks->data);
+    {
+        if ( !callbacks->postcopy(callbacks->data) )
+        {
+            ERROR("postcopy fails");
+            /* postcopy may be implemented in libxl, no way to get errno */
+            rc = -1;
+        }
+    }
 
     /* Enable compression now, finally */
     compressing = (flags & XCFLAGS_CHECKPOINT_COMPRESS);
@@ -2136,8 +2171,11 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
                                io_fd, dom, &info) )
         {
             ERROR("Domain appears not to have suspended");
+            /* postcopy may be implemented in libxl, no way to get errno */
+            errno = -1;
             goto out;
         }
+
         DPRINTF("SUSPEND shinfo %08lx\n", info.shared_info_frame);
         print_stats(xch, dom, 0, &time_stats, &shadow_stats, 1);
 
@@ -2148,6 +2186,16 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
             PERROR("Error flushing shadow PT");
         }
 
+        if ( callbacks->get_dirty_pfn )
+        {
+            if ( update_dirty_bitmap(callbacks->get_dirty_pfn, callbacks->data,
+                                     dinfo->p2m_size, to_send) )
+            {
+                ERROR("getting secondary vm's dirty pages failed");
+                goto out;
+            }
+        }
+
         goto copypages;
     }
 
-- 
1.9.3

next prev parent reply	other threads:[~2014-09-05  9:25 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-09-05  9:25 [RFC Patch v3 00/22] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 01/22] move remus related codes to libxl_remus.c Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 02/22] rename remus device to checkpoint device Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 03/22] adjust the indentation Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 04/22] don't touch remus in checkpoint_device Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 05/22] Update libxl_save_msgs_gen.pl to support return data from xl to xc Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 06/22] Allow slave sends data to master Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 07/22] secondary vm suspend/resume/checkpoint code Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 08/22] primary vm suspend/get_dirty_pfn/resume/checkpoint code Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 09/22] xc_domain_save: flush cache before calling callbacks->postcopy() in colo mode Wen Congyang
2014-09-05  9:25 ` Wen Congyang [this message]
2014-09-05  9:25 ` [RFC Patch v3 11/22] send store mfn and console mfn to xl before resuming secondary vm Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 12/22] implement the cmdline for COLO Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 13/22] blktap2: connect to backup asynchronously Wen Congyang
2014-09-24 19:11   ` Shriram Rajagopalan
2014-09-25  5:40     ` Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 14/22] switch to unprotected mode before closing Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 15/22] blktap2: move async connect related codes to block-replication.c Wen Congyang
2014-09-24 18:48   ` Shriram Rajagopalan
2014-09-05  9:25 ` [RFC Patch v3 16/22] blktap2: move ramdisk " Wen Congyang
2014-09-24 18:44   ` Shriram Rajagopalan
2014-09-26  5:18     ` Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 17/22] block-colo: implement colo disk replication Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 18/22] support blktap COLO in xl: Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 19/22] libxl/colo: setup and control disk replication for blktap2 backends Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 20/22] setup and control colo-agent for primary vm Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 21/22] setup and control colo-agent for secondary vm Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 22/22] colo: cmdline switches and config vars to control colo-agent Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 23/22] Introduce "xen-load-devices-state" Wen Congyang
2014-09-05 21:57   ` Stefano Stabellini
     [not found]   ` <alpine.DEB.2.02.1409052229550.2334@kaball.uk.xensource.com>
2014-09-09  2:47     ` Wen Congyang
     [not found]     ` <540E6A44.8090507@cn.fujitsu.com>
2014-09-10 19:15       ` Stefano Stabellini
     [not found]       ` <alpine.DEB.2.02.1409102005450.8137@kaball.uk.xensource.com>
2014-09-11  5:03         ` Wen Congyang

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:21a6177 dfblob:f7f683a dfblob:61caa47 dfblob:79cc2c8 )
 OR (
bs:"[RFC Patch v3 10/22] COLO: xc related codes" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1409909158-19243-11-git-send-email-wency@cn.fujitsu.com \
    --to=wency@cn.fujitsu.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=Ian.Jackson@eu.citrix.com \
    --cc=eddie.dong@intel.com \
    --cc=laijs@cn.fujitsu.com \
    --cc=xen-devel@lists.xen.org \
    --cc=yanghy@cn.fujitsu.com \
    --cc=yunhong.jiang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).