xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Wen Congyang <wency@cn.fujitsu.com>
To: xen devel <xen-devel@lists.xen.org>
Cc: Ian Campbell <Ian.Campbell@citrix.com>,
	Wen Congyang <wency@cn.fujitsu.com>,
	Ian Jackson <Ian.Jackson@eu.citrix.com>,
	Jiang Yunhong <yunhong.jiang@intel.com>,
	Dong Eddie <eddie.dong@intel.com>,
	Yang Hongyang <yanghy@cn.fujitsu.com>,
	Lai Jiangshan <laijs@cn.fujitsu.com>
Subject: [RFC Patch v3 12/22] implement the cmdline for COLO
Date: Fri, 5 Sep 2014 17:25:47 +0800	[thread overview]
Message-ID: <1409909158-19243-13-git-send-email-wency@cn.fujitsu.com> (raw)
In-Reply-To: <1409909158-19243-1-git-send-email-wency@cn.fujitsu.com>

Add a new option -c to the command 'xl remus'. If you want
to use COLO HA instead of Remus HA, please use -c option.

Update man pages to reflect the addition of a new option to
'xl remus' command.

Also add a new option -c to the internal command 'xl migrate-receive'.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 docs/man/xl.pod.1         | 11 +++++++++--
 tools/libxl/libxl.c       |  6 ++++++
 tools/libxl/xl_cmdimpl.c  | 48 ++++++++++++++++++++++++++++++++++++++---------
 tools/libxl/xl_cmdtable.c |  3 ++-
 4 files changed, 56 insertions(+), 12 deletions(-)

diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1
index bce4bfe..297cd04 100644
--- a/docs/man/xl.pod.1
+++ b/docs/man/xl.pod.1
@@ -427,12 +427,15 @@ Print huge (!) amount of debug during the migration process.
 
 =item B<remus> [I<OPTIONS>] I<domain-id> I<host>
 
-Enable Remus HA for domain. By default B<xl> relies on ssh as a transport
-mechanism between the two hosts.
+Enable Remus HA or COLO HA for domain. By default B<xl> relies on ssh as a
+transport mechanism between the two hosts.
 
 N.B: Remus support in xl is still in experimental (proof-of-concept) phase.
      Disk replication support is limited to DRBD disks.
 
+     COLO support in xl is still in experimental (proof-of-concept) phase.
+     There is no support for network or disk at the moment.
+
 B<OPTIONS>
 
 =over 4
@@ -478,6 +481,10 @@ Disable network output buffering. Requires enabling unsafe mode.
 
 Disable disk replication. Requires enabling unsafe mode.
 
+=item B<-c>
+
+Enable COLO HA. It is conflict with B<-i> and B<-b>.
+
 =back
 
 =item B<pause> I<domain-id>
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index c86b988..39a1879 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -802,6 +802,12 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info,
         goto out;
     }
 
+    /* The caller must set this defbool */
+    if (libxl_defbool_is_default(info->colo)) {
+        LOG(ERROR, "colo mode must be enabled/disabled");
+        goto out;
+    }
+
     libxl_defbool_setdefault(&info->unsafe, false);
     libxl_defbool_setdefault(&info->blackhole, false);
     libxl_defbool_setdefault(&info->compression, true);
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index 8702e08..3709dd2 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -3791,6 +3791,9 @@ static void migrate_receive(int debug, int daemonize, int monitor,
     dom_info.send_fd = send_fd;
     dom_info.migration_domname_r = &migration_domname;
     dom_info.checkpointed_stream = remus;
+    if (remus == LIBXL_CHECKPOINTED_STREAM_COLO)
+        /* COLO uses stdout to send control message to master */
+        dom_info.quiet = 1;
 
     rc = create_domain(&dom_info);
     if (rc < 0) {
@@ -3805,7 +3808,8 @@ static void migrate_receive(int debug, int daemonize, int monitor,
         /* If we are here, it means that the sender (primary) has crashed.
          * TODO: Split-Brain Check.
          */
-        fprintf(stderr, "migration target: Remus Failover for domain %u\n",
+        fprintf(stderr, "migration target: %s Failover for domain %u\n",
+                remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus",
                 domid);
 
         /*
@@ -3822,15 +3826,21 @@ static void migrate_receive(int debug, int daemonize, int monitor,
             rc = libxl_domain_rename(ctx, domid, migration_domname,
                                      common_domname);
             if (rc)
-                fprintf(stderr, "migration target (Remus): "
+                fprintf(stderr, "migration target (%s): "
                         "Failed to rename domain from %s to %s:%d\n",
+                        remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus",
                         migration_domname, common_domname, rc);
         }
 
+        if (remus == LIBXL_CHECKPOINTED_STREAM_COLO)
+            /* The guest is running after failover in COLO mode */
+            exit(rc ? -ERROR_FAIL: 0);
+
         rc = libxl_domain_unpause(ctx, domid);
         if (rc)
-            fprintf(stderr, "migration target (Remus): "
+            fprintf(stderr, "migration target (%s): "
                     "Failed to unpause domain %s (id: %u):%d\n",
+                    remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus",
                     common_domname, domid, rc);
 
         exit(rc ? -ERROR_FAIL: 0);
@@ -3976,7 +3986,7 @@ int main_migrate_receive(int argc, char **argv)
     int debug = 0, daemonize = 1, monitor = 1, remus = 0;
     int opt;
 
-    SWITCH_FOREACH_OPT(opt, "Fedr", NULL, "migrate-receive", 0) {
+    SWITCH_FOREACH_OPT(opt, "Fedrc", NULL, "migrate-receive", 0) {
     case 'F':
         daemonize = 0;
         break;
@@ -3988,8 +3998,10 @@ int main_migrate_receive(int argc, char **argv)
         debug = 1;
         break;
     case 'r':
-        remus = 1;
+        remus = LIBXL_CHECKPOINTED_STREAM_REMUS;
         break;
+    case 'c':
+        remus = LIBXL_CHECKPOINTED_STREAM_COLO;
     }
 
     if (argc-optind != 0) {
@@ -7290,15 +7302,18 @@ int main_remus(int argc, char **argv)
     pid_t child = -1;
     uint8_t *config_data;
     int config_len;
+    int interval = 0;
 
     memset(&r_info, 0, sizeof(libxl_domain_remus_info));
     /* Defaults */
     r_info.interval = 200;
     libxl_defbool_setdefault(&r_info.blackhole, false);
+    libxl_defbool_setdefault(&r_info.colo, false);
 
-    SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:e", NULL, "remus", 2) {
+    SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:ec", NULL, "remus", 2) {
     case 'i':
         r_info.interval = atoi(optarg);
+        interval = 1;
         break;
     case 'F':
         libxl_defbool_set(&r_info.unsafe, true);
@@ -7324,11 +7339,23 @@ int main_remus(int argc, char **argv)
     case 'e':
         daemonize = 0;
         break;
+    case 'c':
+        libxl_defbool_set(&r_info.colo, true);
     }
 
     domid = find_domain(argv[optind]);
     host = argv[optind + 1];
 
+    if (libxl_defbool_val(r_info.colo)) {
+        if (!interval)
+            r_info.interval = 0;
+
+        if (r_info.interval || libxl_defbool_val(r_info.blackhole)) {
+            perror("option -c is conflict with -i or -b");
+            exit(-1);
+        }
+    }
+
     if (!r_info.netbufscript)
         r_info.netbufscript = default_remus_netbufscript;
 
@@ -7343,8 +7370,9 @@ int main_remus(int argc, char **argv)
         if (!ssh_command[0]) {
             rune = host;
         } else {
-            if (asprintf(&rune, "exec %s %s xl migrate-receive -r %s",
+            if (asprintf(&rune, "exec %s %s xl migrate-receive %s %s",
                          ssh_command, host,
+                         libxl_defbool_val(r_info.colo) ? "-c" : "-r",
                          daemonize ? "" : " -e") < 0)
                 return 1;
         }
@@ -7373,7 +7401,8 @@ int main_remus(int argc, char **argv)
      * domain to force failover
      */
     if (libxl_domain_info(ctx, 0, domid)) {
-        fprintf(stderr, "Remus: Primary domain has been destroyed.\n");
+        fprintf(stderr, "%s: Primary domain has been destroyed.\n",
+                libxl_defbool_val(r_info.colo) ? "COLO" : "Remus");
         close(send_fd);
         return 0;
     }
@@ -7385,7 +7414,8 @@ int main_remus(int argc, char **argv)
     if (rc == ERROR_GUEST_TIMEDOUT)
         fprintf(stderr, "Failed to suspend domain at primary.\n");
     else {
-        fprintf(stderr, "Remus: Backup failed? resuming domain at primary.\n");
+        fprintf(stderr, "%s: Backup failed? resuming domain at primary.\n",
+                libxl_defbool_val(r_info.colo) ? "COLO" : "Remus");
         libxl_domain_resume(ctx, domid, 1, 0);
     }
 
diff --git a/tools/libxl/xl_cmdtable.c b/tools/libxl/xl_cmdtable.c
index 6d4596b..22b63db 100644
--- a/tools/libxl/xl_cmdtable.c
+++ b/tools/libxl/xl_cmdtable.c
@@ -498,7 +498,8 @@ struct cmd_spec cmd_table[] = {
       "-b                      Replicate memory checkpoints to /dev/null (blackhole).\n"
       "                        Works only in unsafe mode.\n"
       "-n                      Disable network output buffering. Works only in unsafe mode.\n"
-      "-d                      Disable disk replication. Works only in unsafe mode."
+      "-d                      Disable disk replication. Works only in unsafe mode.\n"
+      "-c                      Enable COLO HA. It is conflict with -i and -b"
     },
 #endif
     { "devd",
-- 
1.9.3

  parent reply	other threads:[~2014-09-05  9:25 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-09-05  9:25 [RFC Patch v3 00/22] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 01/22] move remus related codes to libxl_remus.c Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 02/22] rename remus device to checkpoint device Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 03/22] adjust the indentation Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 04/22] don't touch remus in checkpoint_device Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 05/22] Update libxl_save_msgs_gen.pl to support return data from xl to xc Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 06/22] Allow slave sends data to master Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 07/22] secondary vm suspend/resume/checkpoint code Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 08/22] primary vm suspend/get_dirty_pfn/resume/checkpoint code Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 09/22] xc_domain_save: flush cache before calling callbacks->postcopy() in colo mode Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 10/22] COLO: xc related codes Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 11/22] send store mfn and console mfn to xl before resuming secondary vm Wen Congyang
2014-09-05  9:25 ` Wen Congyang [this message]
2014-09-05  9:25 ` [RFC Patch v3 13/22] blktap2: connect to backup asynchronously Wen Congyang
2014-09-24 19:11   ` Shriram Rajagopalan
2014-09-25  5:40     ` Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 14/22] switch to unprotected mode before closing Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 15/22] blktap2: move async connect related codes to block-replication.c Wen Congyang
2014-09-24 18:48   ` Shriram Rajagopalan
2014-09-05  9:25 ` [RFC Patch v3 16/22] blktap2: move ramdisk " Wen Congyang
2014-09-24 18:44   ` Shriram Rajagopalan
2014-09-26  5:18     ` Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 17/22] block-colo: implement colo disk replication Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 18/22] support blktap COLO in xl: Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 19/22] libxl/colo: setup and control disk replication for blktap2 backends Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 20/22] setup and control colo-agent for primary vm Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 21/22] setup and control colo-agent for secondary vm Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 22/22] colo: cmdline switches and config vars to control colo-agent Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 23/22] Introduce "xen-load-devices-state" Wen Congyang
2014-09-05 21:57   ` Stefano Stabellini
     [not found]   ` <alpine.DEB.2.02.1409052229550.2334@kaball.uk.xensource.com>
2014-09-09  2:47     ` Wen Congyang
     [not found]     ` <540E6A44.8090507@cn.fujitsu.com>
2014-09-10 19:15       ` Stefano Stabellini
     [not found]       ` <alpine.DEB.2.02.1409102005450.8137@kaball.uk.xensource.com>
2014-09-11  5:03         ` Wen Congyang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1409909158-19243-13-git-send-email-wency@cn.fujitsu.com \
    --to=wency@cn.fujitsu.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=Ian.Jackson@eu.citrix.com \
    --cc=eddie.dong@intel.com \
    --cc=laijs@cn.fujitsu.com \
    --cc=xen-devel@lists.xen.org \
    --cc=yanghy@cn.fujitsu.com \
    --cc=yunhong.jiang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).