From: Wen Congyang <wency@cn.fujitsu.com>
To: xen devel <xen-devel@lists.xen.org>
Cc: Ian Campbell <Ian.Campbell@citrix.com>,
Wen Congyang <wency@cn.fujitsu.com>,
Ian Jackson <Ian.Jackson@eu.citrix.com>,
Jiang Yunhong <yunhong.jiang@intel.com>,
Dong Eddie <eddie.dong@intel.com>,
Yang Hongyang <yanghy@cn.fujitsu.com>,
Lai Jiangshan <laijs@cn.fujitsu.com>
Subject: [RFC Patch v2 23/45] implement the cmdline for COLO
Date: Fri, 8 Aug 2014 15:01:22 +0800 [thread overview]
Message-ID: <1407481305-19808-24-git-send-email-wency@cn.fujitsu.com> (raw)
In-Reply-To: <1407481305-19808-1-git-send-email-wency@cn.fujitsu.com>
Add a new option -c to the command 'xl remus'. If you want
to use COLO HA instead of Remus HA, please use -c option.
Update man pages to reflect the addition of a new option to
'xl remus' command.
Also add a new option -c to the internal command 'xl migrate-receive'.
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
docs/man/xl.pod.1 | 11 +++++++++--
tools/libxl/xl_cmdimpl.c | 47 ++++++++++++++++++++++++++++++++++++++---------
tools/libxl/xl_cmdtable.c | 3 ++-
3 files changed, 49 insertions(+), 12 deletions(-)
diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1
index bce4bfe..297cd04 100644
--- a/docs/man/xl.pod.1
+++ b/docs/man/xl.pod.1
@@ -427,12 +427,15 @@ Print huge (!) amount of debug during the migration process.
=item B<remus> [I<OPTIONS>] I<domain-id> I<host>
-Enable Remus HA for domain. By default B<xl> relies on ssh as a transport
-mechanism between the two hosts.
+Enable Remus HA or COLO HA for domain. By default B<xl> relies on ssh as a
+transport mechanism between the two hosts.
N.B: Remus support in xl is still in experimental (proof-of-concept) phase.
Disk replication support is limited to DRBD disks.
+ COLO support in xl is still in experimental (proof-of-concept) phase.
+ There is no support for network or disk at the moment.
+
B<OPTIONS>
=over 4
@@ -478,6 +481,10 @@ Disable network output buffering. Requires enabling unsafe mode.
Disable disk replication. Requires enabling unsafe mode.
+=item B<-c>
+
+Enable COLO HA. It is conflict with B<-i> and B<-b>.
+
=back
=item B<pause> I<domain-id>
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index ccb46ab..1110d53 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -3754,6 +3754,9 @@ static void migrate_receive(int debug, int daemonize, int monitor,
dom_info.send_fd = send_fd;
dom_info.migration_domname_r = &migration_domname;
dom_info.checkpointed_stream = remus;
+ if (remus == LIBXL_CHECKPOINTED_STREAM_COLO)
+ /* COLO uses stdout to send control message to master */
+ dom_info.quiet = 1;
rc = create_domain(&dom_info);
if (rc < 0) {
@@ -3768,7 +3771,8 @@ static void migrate_receive(int debug, int daemonize, int monitor,
/* If we are here, it means that the sender (primary) has crashed.
* TODO: Split-Brain Check.
*/
- fprintf(stderr, "migration target: Remus Failover for domain %u\n",
+ fprintf(stderr, "migration target: %s Failover for domain %u\n",
+ remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus",
domid);
/*
@@ -3785,15 +3789,21 @@ static void migrate_receive(int debug, int daemonize, int monitor,
rc = libxl_domain_rename(ctx, domid, migration_domname,
common_domname);
if (rc)
- fprintf(stderr, "migration target (Remus): "
+ fprintf(stderr, "migration target (%s): "
"Failed to rename domain from %s to %s:%d\n",
+ remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus",
migration_domname, common_domname, rc);
}
+ if (remus == LIBXL_CHECKPOINTED_STREAM_COLO)
+ /* The guest is running after failover in COLO mode */
+ exit(rc ? -ERROR_FAIL: 0);
+
rc = libxl_domain_unpause(ctx, domid);
if (rc)
- fprintf(stderr, "migration target (Remus): "
+ fprintf(stderr, "migration target (%s): "
"Failed to unpause domain %s (id: %u):%d\n",
+ remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus",
common_domname, domid, rc);
exit(rc ? -ERROR_FAIL: 0);
@@ -3939,7 +3949,7 @@ int main_migrate_receive(int argc, char **argv)
int debug = 0, daemonize = 1, monitor = 1, remus = 0;
int opt;
- SWITCH_FOREACH_OPT(opt, "Fedr", NULL, "migrate-receive", 0) {
+ SWITCH_FOREACH_OPT(opt, "Fedrc", NULL, "migrate-receive", 0) {
case 'F':
daemonize = 0;
break;
@@ -3951,8 +3961,10 @@ int main_migrate_receive(int argc, char **argv)
debug = 1;
break;
case 'r':
- remus = 1;
+ remus = LIBXL_CHECKPOINTED_STREAM_REMUS;
break;
+ case 'c':
+ remus = LIBXL_CHECKPOINTED_STREAM_COLO;
}
if (argc-optind != 0) {
@@ -7253,6 +7265,7 @@ int main_remus(int argc, char **argv)
pid_t child = -1;
uint8_t *config_data;
int config_len;
+ int interval = 0;
memset(&r_info, 0, sizeof(libxl_domain_remus_info));
/* Defaults */
@@ -7263,9 +7276,10 @@ int main_remus(int argc, char **argv)
r_info.netbuf = 1;
r_info.diskbuf = 1;
- SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:e", NULL, "remus", 2) {
+ SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:ec", NULL, "remus", 2) {
case 'i':
r_info.interval = atoi(optarg);
+ interval = 1;
break;
case 'F':
r_info.unsafe = 1;
@@ -7291,6 +7305,8 @@ int main_remus(int argc, char **argv)
case 'e':
daemonize = 0;
break;
+ case 'c':
+ r_info.colo = 1;
}
if (!r_info.unsafe &&
@@ -7303,6 +7319,16 @@ int main_remus(int argc, char **argv)
domid = find_domain(argv[optind]);
host = argv[optind + 1];
+ if (r_info.colo) {
+ if (!interval)
+ r_info.interval = 0;
+
+ if (r_info.interval + r_info.blackhole > 0) {
+ perror("option c is conflict with i or b");
+ exit(-1);
+ }
+ }
+
if (!r_info.netbufscript)
r_info.netbufscript = default_remus_netbufscript;
@@ -7317,8 +7343,9 @@ int main_remus(int argc, char **argv)
if (!ssh_command[0]) {
rune = host;
} else {
- if (asprintf(&rune, "exec %s %s xl migrate-receive -r %s",
+ if (asprintf(&rune, "exec %s %s xl migrate-receive %s %s",
ssh_command, host,
+ r_info.colo ? "-c" : "-r",
daemonize ? "" : " -e") < 0)
return 1;
}
@@ -7347,7 +7374,8 @@ int main_remus(int argc, char **argv)
* domain to force failover
*/
if (libxl_domain_info(ctx, 0, domid)) {
- fprintf(stderr, "Remus: Primary domain has been destroyed.\n");
+ fprintf(stderr, "%s: Primary domain has been destroyed.\n",
+ r_info.colo ? "COLO" : "Remus");
close(send_fd);
return 0;
}
@@ -7359,7 +7387,8 @@ int main_remus(int argc, char **argv)
if (rc == ERROR_GUEST_TIMEDOUT)
fprintf(stderr, "Failed to suspend domain at primary.\n");
else {
- fprintf(stderr, "Remus: Backup failed? resuming domain at primary.\n");
+ fprintf(stderr, "%s: Backup failed? resuming domain at primary.\n",
+ r_info.colo ? "COLO" : "Remus");
libxl_domain_resume(ctx, domid, 1, 0);
}
diff --git a/tools/libxl/xl_cmdtable.c b/tools/libxl/xl_cmdtable.c
index 6d4596b..22b63db 100644
--- a/tools/libxl/xl_cmdtable.c
+++ b/tools/libxl/xl_cmdtable.c
@@ -498,7 +498,8 @@ struct cmd_spec cmd_table[] = {
"-b Replicate memory checkpoints to /dev/null (blackhole).\n"
" Works only in unsafe mode.\n"
"-n Disable network output buffering. Works only in unsafe mode.\n"
- "-d Disable disk replication. Works only in unsafe mode."
+ "-d Disable disk replication. Works only in unsafe mode.\n"
+ "-c Enable COLO HA. It is conflict with -i and -b"
},
#endif
{ "devd",
--
1.9.3
next prev parent reply other threads:[~2014-08-08 7:01 UTC|newest]
Thread overview: 64+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-08-08 7:00 [RFC Patch v2 00/45] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 01/45] copy the correct page to memory Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 02/45] csum the correct page Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 03/45] don't zero out ioreq page Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 04/45] Refactor domain_suspend_callback_common() Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 05/45] Update libxl__domain_resume() for colo Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 06/45] Update libxl__domain_suspend_common_switch_qemu_logdirty() " Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 07/45] Introduce a new internal API libxl__domain_unpause() Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 08/45] Update libxl__domain_unpause() to support qemu-xen Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 09/45] support to resume uncooperative HVM guests Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 10/45] update datecopier to support sending data only Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 11/45] introduce a new API to aync read data from fd Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 12/45] move remus related codes to libxl_remus.c Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 13/45] rename remus device to checkpoint device Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 14/45] adjust the indentation Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 15/45] don't touch remus in checkpoint_device Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 16/45] Update libxl_save_msgs_gen.pl to support return data from xl to xc Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 17/45] Allow slave sends data to master Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 18/45] secondary vm suspend/resume/checkpoint code Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 19/45] primary vm suspend/get_dirty_pfn/resume/checkpoint code Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 20/45] xc_domain_save: flush cache before calling callbacks->postcopy() in colo mode Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 21/45] COLO: xc related codes Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 22/45] send store mfn and console mfn to xl before resuming secondary vm Wen Congyang
2014-08-08 7:01 ` Wen Congyang [this message]
2014-08-08 7:01 ` [RFC Patch v2 24/45] HACK: do checkpoint per 20ms Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 25/45] colo: dynamic allocate aio_requests to avoid -EBUSY error Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 26/45] fix memory leak in block-remus Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 27/45] pass uuid to the callback td_open Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 28/45] return the correct dev path Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 29/45] blktap2: use correct way to get remus_image Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 30/45] don't call client_flush() when switching to unprotected mode Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 31/45] remus: fix bug in tdremus_close() Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 32/45] blktap2: use correct way to get free event id Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 33/45] blktap2: don't return negative " Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 34/45] blktap2: use correct way to define array Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 35/45] blktap2: connect to backup asynchronously Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 36/45] switch to unprotected mode before closing Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 37/45] blktap2: move async connect related codes to block-replication.c Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 38/45] blktap2: move ramdisk " Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 39/45] block-colo: implement colo disk replication Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 40/45] pass correct file to qemu if we use blktap2 Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 41/45] support blktap remus in xl Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 42/45] support blktap colo in xl: Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 43/45] update libxl__device_disk_from_xs_be() to support blktap device Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 44/45] libxl/colo: setup and control disk replication for blktap2 backends Wen Congyang
2014-08-08 7:01 ` [RFC Patch v2 45/45] x86/hvm: Always set pending event injection when loading VMC[BS] state Wen Congyang
2014-08-08 7:24 ` Jan Beulich
2014-08-08 7:29 ` Wen Congyang
2014-08-26 16:02 ` Jan Beulich
2014-08-27 0:46 ` Wen Congyang
2014-08-27 14:58 ` Aravind Gopalakrishnan
2014-08-28 1:04 ` Wen Congyang
2014-08-28 8:54 ` Andrew Cooper
2014-08-28 11:17 ` Wen Congyang
2014-08-28 11:31 ` Paul Durrant
2014-08-29 5:59 ` Wen Congyang
2014-08-28 9:53 ` Tim Deegan
2014-08-27 23:24 ` Tian, Kevin
2014-08-27 15:02 ` Andrew Cooper
2014-08-08 7:01 ` [RFC Patch v2 46/45] Introduce "xen-load-devices-state" Wen Congyang
2014-08-08 7:19 ` [RFC Patch v2 00/45] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Jan Beulich
2014-08-08 7:39 ` Wen Congyang
2014-08-08 8:21 ` Wen Congyang
2014-08-08 9:02 ` Jan Beulich
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1407481305-19808-24-git-send-email-wency@cn.fujitsu.com \
--to=wency@cn.fujitsu.com \
--cc=Ian.Campbell@citrix.com \
--cc=Ian.Jackson@eu.citrix.com \
--cc=eddie.dong@intel.com \
--cc=laijs@cn.fujitsu.com \
--cc=xen-devel@lists.xen.org \
--cc=yanghy@cn.fujitsu.com \
--cc=yunhong.jiang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).