* [PATCH 0 of 3] Remus - libxl support
@ 2012-01-23 22:46 rshriram
2012-01-23 22:46 ` [PATCH 1 of 3] tools/libxl: QEMU device model suspend/resume rshriram
` (2 more replies)
0 siblings, 3 replies; 13+ messages in thread
From: rshriram @ 2012-01-23 22:46 UTC (permalink / raw)
To: xen-devel; +Cc: brendan, ian.jackson
This patch series introduces a basic support framework to
incorporate Remus into the libxl toolstack. The only functionality
currently implemented is memory checkpointing.
Shriram
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 1 of 3] tools/libxl: QEMU device model suspend/resume
2012-01-23 22:46 [PATCH 0 of 3] Remus - libxl support rshriram
@ 2012-01-23 22:46 ` rshriram
2012-01-25 11:08 ` Ian Campbell
2012-01-23 22:46 ` [PATCH 2 of 3] tools/libxl: suspend/postflush/commit callbacks rshriram
2012-01-23 22:46 ` [PATCH 3 of 3] tools/libxl: xl remus/remus-receive commands rshriram
2 siblings, 1 reply; 13+ messages in thread
From: rshriram @ 2012-01-23 22:46 UTC (permalink / raw)
To: xen-devel; +Cc: brendan, ian.jackson
# HG changeset patch
# User Shriram Rajagopalan <rshriram@cs.ubc.ca>
# Date 1327358638 28800
# Node ID 11fb1dfda7de4d6759dec87d80cd16cf137f7369
# Parent 80fdf2182bc62ca358ba2f1a3513b47a4f8d9dfd
tools/libxl: QEMU device model suspend/resume
* Refactor the libxl__domain_save_device_model.
* Add support for suspend/resume QEMU device model
(both traditional xen and QMP).
Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>
diff -r 80fdf2182bc6 -r 11fb1dfda7de tools/libxl/libxl.c
--- a/tools/libxl/libxl.c Sat Jan 21 17:15:40 2012 +0000
+++ b/tools/libxl/libxl.c Mon Jan 23 14:43:58 2012 -0800
@@ -477,7 +477,7 @@
rc = libxl__domain_suspend_common(gc, domid, fd, type, live, debug);
if (!rc && type == LIBXL_DOMAIN_TYPE_HVM)
- rc = libxl__domain_save_device_model(gc, domid, fd);
+ rc = libxl__domain_save_device_model(gc, domid, fd, /* No Remus */ 0);
GC_FREE;
return rc;
}
diff -r 80fdf2182bc6 -r 11fb1dfda7de tools/libxl/libxl_dom.c
--- a/tools/libxl/libxl_dom.c Sat Jan 21 17:15:40 2012 +0000
+++ b/tools/libxl/libxl_dom.c Mon Jan 23 14:43:58 2012 -0800
@@ -534,6 +534,53 @@
return 0;
}
+static int libxl__remus_domain_suspend_qemu(libxl__gc *gc, uint32_t domid)
+{
+
+ switch (libxl__device_model_version_running(gc, domid)) {
+ case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
+ char *path = NULL;
+ path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command",
+ domid);
+ libxl__xs_write(gc, XBT_NULL, path, "save");
+ libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL, NULL);
+ break;
+ }
+ case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
+ /* Stop QEMU */
+ if (libxl__qmp_stop(gc, domid))
+ return ERROR_FAIL;
+ break;
+ default:
+ return ERROR_INVAL;
+ }
+
+ return 0;
+}
+
+static int libxl__remus_domain_resume_qemu(libxl__gc *gc, uint32_t domid)
+{
+
+ switch (libxl__device_model_version_running(gc, domid)) {
+ case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
+ char *path = NULL;
+ path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command",
+ domid);
+ libxl__xs_write(gc, XBT_NULL, path, "continue");
+ break;
+ }
+ case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
+ /* Stop QEMU */
+ if (libxl__qmp_resume(gc, domid))
+ return ERROR_FAIL;
+ break;
+ default:
+ return ERROR_INVAL;
+ }
+
+ return 0;
+}
+
int libxl__domain_suspend_common(libxl__gc *gc, uint32_t domid, int fd,
libxl_domain_type type,
int live, int debug)
@@ -620,7 +667,7 @@
return rc;
}
-int libxl__domain_save_device_model(libxl__gc *gc, uint32_t domid, int fd)
+int libxl__domain_save_device_model(libxl__gc *gc, uint32_t domid, int fd, int remus)
{
libxl_ctx *ctx = libxl__gc_owner(gc);
int ret, fd2 = -1, c;
@@ -631,13 +678,12 @@
switch (libxl__device_model_version_running(gc, domid)) {
case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
- char *path = NULL;
- LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
- "Saving device model state to %s", filename);
- path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command",
- domid);
- libxl__xs_write(gc, XBT_NULL, path, "save");
- libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL, NULL);
+ /* For Remus,we issue suspend_qemu call separately */
+ if (!remus) {
+ c = libxl__remus_domain_suspend_qemu(gc, domid);
+ if (c)
+ return c;
+ }
break;
}
case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
@@ -668,8 +714,9 @@
qemu_state_len = st.st_size;
LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Qemu state is %d bytes\n", qemu_state_len);
- ret = libxl_write_exactly(ctx, fd, QEMU_SIGNATURE, strlen(QEMU_SIGNATURE),
- "saved-state file", "qemu signature");
+ ret = libxl_write_exactly(ctx, fd, remus ? REMUS_QEMU_SIGNATURE : QEMU_SIGNATURE,
+ remus ? strlen(REMUS_QEMU_SIGNATURE): strlen(QEMU_SIGNATURE),
+ "saved-state file", "qemu signature");
if (ret)
goto out;
diff -r 80fdf2182bc6 -r 11fb1dfda7de tools/libxl/libxl_internal.h
--- a/tools/libxl/libxl_internal.h Sat Jan 21 17:15:40 2012 +0000
+++ b/tools/libxl/libxl_internal.h Mon Jan 23 14:43:58 2012 -0800
@@ -73,6 +73,7 @@
#define LIBXL_HVM_EXTRA_MEMORY 2048
#define LIBXL_MIN_DOM0_MEM (128*1024)
#define QEMU_SIGNATURE "DeviceModelRecord0002"
+#define REMUS_QEMU_SIGNATURE "RemusDeviceModelState"
#define STUBDOM_CONSOLE_LOGGING 0
#define STUBDOM_CONSOLE_SAVE 1
#define STUBDOM_CONSOLE_RESTORE 2
@@ -273,7 +274,8 @@
libxl_domain_type type,
int live, int debug);
_hidden const char *libxl__device_model_savefile(libxl__gc *gc, uint32_t domid);
-_hidden int libxl__domain_save_device_model(libxl__gc *gc, uint32_t domid, int fd);
+_hidden int libxl__domain_save_device_model(libxl__gc *gc, uint32_t domid, int fd,
+ int remus);
_hidden void libxl__userdata_destroyall(libxl__gc *gc, uint32_t domid);
_hidden int libxl__domain_pvcontrol_available(libxl__gc *gc, uint32_t domid);
@@ -616,6 +618,10 @@
_hidden int libxl__qmp_pci_add(libxl__gc *gc, int d, libxl_device_pci *pcidev);
_hidden int libxl__qmp_pci_del(libxl__gc *gc, int domid,
libxl_device_pci *pcidev);
+/* Suspend QEMU. */
+_hidden int libxl__qmp_stop(libxl__gc *gc, int domid);
+/* Resume QEMU. */
+_hidden int libxl__qmp_resume(libxl__gc *gc, int domid);
/* Save current QEMU state into fd. */
_hidden int libxl__qmp_migrate(libxl__gc *gc, int domid, int fd);
/* close and free the QMP handler */
diff -r 80fdf2182bc6 -r 11fb1dfda7de tools/libxl/libxl_qmp.c
--- a/tools/libxl/libxl_qmp.c Sat Jan 21 17:15:40 2012 +0000
+++ b/tools/libxl/libxl_qmp.c Mon Jan 23 14:43:58 2012 -0800
@@ -878,6 +878,38 @@
return rc;
}
+int libxl__qmp_stop(libxl__gc *gc, int domid)
+{
+ libxl__qmp_handler *qmp = NULL;
+ int rc = 0;
+
+ qmp = libxl__qmp_initialize(libxl__gc_owner(gc), domid);
+ if (!qmp)
+ return ERROR_FAIL;
+
+ rc = qmp_synchronous_send(qmp, "stop", NULL,
+ NULL, NULL, qmp->timeout);
+
+ libxl__qmp_close(qmp);
+ return rc;
+}
+
+int libxl__qmp_resume(libxl__gc *gc, int domid)
+{
+ libxl__qmp_handler *qmp = NULL;
+ int rc = 0;
+
+ qmp = libxl__qmp_initialize(libxl__gc_owner(gc), domid);
+ if (!qmp)
+ return ERROR_FAIL;
+
+ rc = qmp_synchronous_send(qmp, "cont", NULL,
+ NULL, NULL, qmp->timeout);
+
+ libxl__qmp_close(qmp);
+ return rc;
+}
+
int libxl__qmp_initializations(libxl_ctx *ctx, uint32_t domid)
{
libxl__qmp_handler *qmp = NULL;
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 2 of 3] tools/libxl: suspend/postflush/commit callbacks
2012-01-23 22:46 [PATCH 0 of 3] Remus - libxl support rshriram
2012-01-23 22:46 ` [PATCH 1 of 3] tools/libxl: QEMU device model suspend/resume rshriram
@ 2012-01-23 22:46 ` rshriram
2012-01-25 11:22 ` Ian Campbell
2012-01-23 22:46 ` [PATCH 3 of 3] tools/libxl: xl remus/remus-receive commands rshriram
2 siblings, 1 reply; 13+ messages in thread
From: rshriram @ 2012-01-23 22:46 UTC (permalink / raw)
To: xen-devel; +Cc: brendan, ian.jackson
# HG changeset patch
# User Shriram Rajagopalan <rshriram@cs.ubc.ca>
# Date 1327358640 28800
# Node ID 0446591bee86eb4e767d75b70c885549c7a3cfef
# Parent 11fb1dfda7de4d6759dec87d80cd16cf137f7369
tools/libxl: suspend/postflush/commit callbacks
* Add libxl callbacks for Remus checkpoint suspend, postflush (aka resume)
and checkpoint commit callbacks.
* suspend callback just bounces off libxl__domain_suspend_common_callback
* resume callback directly calls xc_domain_resume instead of re-using
libxl_domain_resume (as the latter does not set the fast suspend argument
to xc_domain_resume - aka suspend_cancel support).
* commit callback just sleeps for the checkpoint interval (for the moment).
* Future patches will augument these callbacks with more functionalities like
issuing network buffer plug/unplug commands, disk checkpoint commands, etc.
Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>
diff -r 11fb1dfda7de -r 0446591bee86 tools/libxl/libxl.c
--- a/tools/libxl/libxl.c Mon Jan 23 14:43:58 2012 -0800
+++ b/tools/libxl/libxl.c Mon Jan 23 14:44:00 2012 -0800
@@ -475,7 +475,9 @@
int debug = info != NULL && info->flags & XL_SUSPEND_DEBUG;
int rc = 0;
- rc = libxl__domain_suspend_common(gc, domid, fd, type, live, debug);
+ rc = libxl__domain_suspend_common(gc, domid, fd, type, live, debug,
+ /* No Remus */ NULL);
+
if (!rc && type == LIBXL_DOMAIN_TYPE_HVM)
rc = libxl__domain_save_device_model(gc, domid, fd, /* No Remus */ 0);
GC_FREE;
diff -r 11fb1dfda7de -r 0446591bee86 tools/libxl/libxl.h
--- a/tools/libxl/libxl.h Mon Jan 23 14:43:58 2012 -0800
+++ b/tools/libxl/libxl.h Mon Jan 23 14:44:00 2012 -0800
@@ -212,6 +212,12 @@
int (*suspend_callback)(void *, int);
} libxl_domain_suspend_info;
+typedef struct {
+ int interval;
+ int blackhole;
+ int compression;
+} libxl_domain_remus_info;
+
enum {
ERROR_NONSPECIFIC = -1,
ERROR_VERSION = -2,
diff -r 11fb1dfda7de -r 0446591bee86 tools/libxl/libxl_dom.c
--- a/tools/libxl/libxl_dom.c Mon Jan 23 14:43:58 2012 -0800
+++ b/tools/libxl/libxl_dom.c Mon Jan 23 14:44:00 2012 -0800
@@ -395,6 +395,8 @@
int hvm;
unsigned int flags;
int guest_responded;
+ int save_fd; /* Migration stream fd (for Remus) */
+ int interval; /* remus checkpoint interval */
};
static int libxl__domain_suspend_common_switch_qemu_logdirty(int domid, unsigned int enable, void *data)
@@ -581,9 +583,69 @@
return 0;
}
+static int libxl__remus_domain_suspend_callback(void *data)
+{
+ struct suspendinfo *si = data;
+
+ if (libxl__domain_suspend_common_callback(data)) {
+ if (si->hvm) {
+ if (!libxl__remus_domain_suspend_qemu(si->gc, si->domid))
+ return 1;
+ else
+ return 0;
+ }
+ return 1;
+ }
+ return 0;
+}
+
+static int libxl__remus_domain_resume_callback(void *data)
+{
+ struct suspendinfo *si = data;
+ libxl_ctx *ctx = libxl__gc_owner(si->gc);
+
+ /* Assumes that SUSPEND_CANCEL is available - just like
+ * xm version of Remus. Without that, remus wont work.
+ * Since libxl_domain_resume calls xc_domain_resume with
+ * fast_suspend = 0, we cant re-use that api.
+ */
+ if (xc_domain_resume(ctx->xch, si->domid, 1)) {
+ LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
+ "xc_domain_resume failed for domain %u",
+ si->domid);
+ return 0;
+ }
+ if (!xs_resume_domain(ctx->xsh, si->domid)) {
+ LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
+ "xs_resume_domain failed for domain %u",
+ si->domid);
+ return 0;
+ }
+
+ if (si->hvm) {
+ if (libxl__remus_domain_resume_qemu(si->gc, si->domid))
+ return 0;
+ }
+ return 1;
+}
+
+/* For now, just sleep. Later, we need to release disk/netbuf */
+static int libxl__remus_domain_checkpoint_callback(void *data)
+{
+ struct suspendinfo *si = data;
+
+ if (si->hvm && libxl__domain_save_device_model(si->gc, si->domid,
+ si->save_fd, 1))
+ return 0;
+
+ usleep(si->interval * 1000);
+ return 1;
+}
+
int libxl__domain_suspend_common(libxl__gc *gc, uint32_t domid, int fd,
libxl_domain_type type,
- int live, int debug)
+ int live, int debug,
+ const libxl_domain_remus_info *r_info)
{
libxl_ctx *ctx = libxl__gc_owner(gc);
int flags;
@@ -614,10 +676,20 @@
return ERROR_INVAL;
}
+ memset(&si, 0, sizeof(si));
flags = (live) ? XCFLAGS_LIVE : 0
| (debug) ? XCFLAGS_DEBUG : 0
| (hvm) ? XCFLAGS_HVM : 0;
+ if (r_info != NULL) {
+ si.interval = r_info->interval;
+ if (r_info->compression)
+ flags |= XCFLAGS_CHECKPOINT_COMPRESS;
+ si.save_fd = fd;
+ }
+ else
+ si.save_fd = -1;
+
si.domid = domid;
si.flags = flags;
si.hvm = hvm;
@@ -641,7 +713,13 @@
}
memset(&callbacks, 0, sizeof(callbacks));
- callbacks.suspend = libxl__domain_suspend_common_callback;
+ if (r_info != NULL) {
+ callbacks.suspend = libxl__remus_domain_suspend_callback;
+ callbacks.postcopy = libxl__remus_domain_resume_callback;
+ callbacks.checkpoint = libxl__remus_domain_checkpoint_callback;
+ } else
+ callbacks.suspend = libxl__domain_suspend_common_callback;
+
callbacks.switch_qemu_logdirty = libxl__domain_suspend_common_switch_qemu_logdirty;
callbacks.data = &si;
diff -r 11fb1dfda7de -r 0446591bee86 tools/libxl/libxl_internal.h
--- a/tools/libxl/libxl_internal.h Mon Jan 23 14:43:58 2012 -0800
+++ b/tools/libxl/libxl_internal.h Mon Jan 23 14:44:00 2012 -0800
@@ -272,7 +272,8 @@
int fd);
_hidden int libxl__domain_suspend_common(libxl__gc *gc, uint32_t domid, int fd,
libxl_domain_type type,
- int live, int debug);
+ int live, int debug,
+ const libxl_domain_remus_info *r_info);
_hidden const char *libxl__device_model_savefile(libxl__gc *gc, uint32_t domid);
_hidden int libxl__domain_save_device_model(libxl__gc *gc, uint32_t domid, int fd,
int remus);
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 3 of 3] tools/libxl: xl remus/remus-receive commands
2012-01-23 22:46 [PATCH 0 of 3] Remus - libxl support rshriram
2012-01-23 22:46 ` [PATCH 1 of 3] tools/libxl: QEMU device model suspend/resume rshriram
2012-01-23 22:46 ` [PATCH 2 of 3] tools/libxl: suspend/postflush/commit callbacks rshriram
@ 2012-01-23 22:46 ` rshriram
2012-01-25 11:52 ` Ian Campbell
2 siblings, 1 reply; 13+ messages in thread
From: rshriram @ 2012-01-23 22:46 UTC (permalink / raw)
To: xen-devel; +Cc: brendan, ian.jackson
# HG changeset patch
# User Shriram Rajagopalan <rshriram@cs.ubc.ca>
# Date 1327358642 28800
# Node ID 822536df4aeced5aee00f1f26299086faa622681
# Parent 0446591bee86eb4e767d75b70c885549c7a3cfef
tools/libxl: xl remus/remus-receive commands
* xl remus (and its receive counterpart remus-receive) act as frontends
to enable remus for a given domain.
* At the moment, only memory checkpointing and blackhole replication are
supported. Support for disk checkpointing and network buffering will
be added in future.
* xl remus borrows some aspects of xl migrate. Replication is currently
done over a ssh connection. Future versions will use a low-overhead
plain tcp socket for replication (similar to xend/remus).
Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>
diff -r 0446591bee86 -r 822536df4aec tools/libxl/libxl.c
--- a/tools/libxl/libxl.c Mon Jan 23 14:44:00 2012 -0800
+++ b/tools/libxl/libxl.c Mon Jan 23 14:44:02 2012 -0800
@@ -466,6 +466,40 @@
return ptr;
}
+int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info,
+ uint32_t domid, int fd)
+{
+ GC_INIT(ctx);
+ libxl_domain_type type = libxl__domain_type(gc, domid);
+ int rc = 0;
+
+ if (info == NULL) {
+ LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
+ "No remus_info structure supplied for domain %d", domid);
+ rc = -1;
+ goto remus_fail;
+ }
+
+ /* TBD: Remus setup - i.e. attach qdisc, enable disk buffering, etc */
+
+ /* Point of no return */
+ rc = libxl__domain_suspend_common(gc, domid, fd, type, /* live */ 1,
+ /* debug */ 0, info);
+
+ /*
+ * With Remus, if we reach this point, it means either
+ * backup died or some network error occurred preventing us
+ * from sending checkpoints.
+ */
+
+ /* TBD: Remus cleanup - i.e. detach qdisc, release other
+ * resources.
+ */
+ remus_fail:
+ GC_FREE;
+ return rc;
+}
+
int libxl_domain_suspend(libxl_ctx *ctx, libxl_domain_suspend_info *info,
uint32_t domid, int fd)
{
diff -r 0446591bee86 -r 822536df4aec tools/libxl/libxl.h
--- a/tools/libxl/libxl.h Mon Jan 23 14:44:00 2012 -0800
+++ b/tools/libxl/libxl.h Mon Jan 23 14:44:02 2012 -0800
@@ -272,6 +272,8 @@
int libxl_domain_create_new(libxl_ctx *ctx, libxl_domain_config *d_config, libxl_console_ready cb, void *priv, uint32_t *domid);
int libxl_domain_create_restore(libxl_ctx *ctx, libxl_domain_config *d_config, libxl_console_ready cb, void *priv, uint32_t *domid, int restore_fd);
void libxl_domain_config_dispose(libxl_domain_config *d_config);
+int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info,
+ uint32_t domid, int fd);
int libxl_domain_suspend(libxl_ctx *ctx, libxl_domain_suspend_info *info,
uint32_t domid, int fd);
int libxl_domain_resume(libxl_ctx *ctx, uint32_t domid);
diff -r 0446591bee86 -r 822536df4aec tools/libxl/xl.h
--- a/tools/libxl/xl.h Mon Jan 23 14:44:00 2012 -0800
+++ b/tools/libxl/xl.h Mon Jan 23 14:44:02 2012 -0800
@@ -93,6 +93,8 @@
int main_getenforce(int argc, char **argv);
int main_setenforce(int argc, char **argv);
int main_loadpolicy(int argc, char **argv);
+int main_remus_receive(int argc, char **argv);
+int main_remus(int argc, char **argv);
void help(const char *command);
diff -r 0446591bee86 -r 822536df4aec tools/libxl/xl_cmdimpl.c
--- a/tools/libxl/xl_cmdimpl.c Mon Jan 23 14:44:00 2012 -0800
+++ b/tools/libxl/xl_cmdimpl.c Mon Jan 23 14:44:02 2012 -0800
@@ -1814,7 +1814,7 @@
* If we have daemonized then do not return to the caller -- this has
* already happened in the parent.
*/
- if ( !need_daemon )
+ if ( daemonize && !need_daemon )
exit(ret);
return ret;
@@ -5853,6 +5853,175 @@
return ret;
}
+int main_remus_receive(int argc, char **argv)
+{
+ int rc;
+ char *migration_domname;
+ struct domain_create dom_info;
+
+ signal(SIGPIPE, SIG_IGN);
+ memset(&dom_info, 0, sizeof(dom_info));
+ dom_info.debug = 1;
+ dom_info.no_incr_generationid = 1;
+ dom_info.restore_file = "incoming checkpoint stream";
+ dom_info.migrate_fd = 0; /* stdin - will change in future*/
+ dom_info.migration_domname_r = &migration_domname;
+
+ rc = create_domain(&dom_info);
+ if (rc < 0) {
+ fprintf(stderr, "migration target (Remus): Domain creation failed"
+ " (code %d) domid %u.\n", rc, domid);
+ exit(-rc);
+ }
+
+ /* If we are here, it means that the sender (primary) has crashed.
+ * If domain renaming fails, lets just continue (as we need the domain
+ * to be up & dom names may not matter much, as long as its reachable
+ * over network).
+ *
+ * If domain unpausing fails, destroy domain ? Or is it better to have
+ * a consistent copy of the domain (memory, cpu state, disk)
+ * on atleast one physical host ? Right now, lets just leave the domain
+ * as is and let the Administrator decide (or troubleshoot).
+ */
+ fprintf(stderr, "migration target: Remus Failover for domain %u\n", domid);
+ if (migration_domname) {
+ rc = libxl_domain_rename(ctx, domid, migration_domname,
+ common_domname);
+ if (rc)
+ fprintf(stderr, "migration target (Remus): "
+ "Failed to rename domain from %s to %s:%d\n",
+ migration_domname, common_domname, rc);
+
+ rc = libxl_domain_unpause(ctx, domid);
+ if (rc)
+ fprintf(stderr, "migration target (Remus): "
+ "Failed to unpause domain %s (id: %u):%d\n",
+ common_domname, domid, rc);
+ }
+
+ return -rc;
+}
+
+int main_remus(int argc, char **argv)
+{
+ int opt, rc;
+ const char *ssh_command = "ssh";
+ char *host = NULL, *rune = NULL, *domain = NULL;
+
+ int sendpipe[2], recvpipe[2];
+ int send_fd = -1, recv_fd = -1;
+ pid_t child = -1;
+
+ uint8_t *config_data;
+ int config_len;
+
+ libxl_domain_remus_info r_info;
+
+ memset(&r_info, 0, sizeof(libxl_domain_remus_info));
+ /* Defaults */
+ r_info.interval = 200;
+ r_info.blackhole = 0;
+ r_info.compression = 1;
+
+ while ((opt = def_getopt(argc, argv, "bui:s:", "remus", 2)) != -1) {
+ switch (opt) {
+ case 0: case 2:
+ return opt;
+
+ case 'i':
+ r_info.interval = atoi(optarg);
+ break;
+ case 'b':
+ r_info.blackhole = 1;
+ break;
+ case 'u':
+ r_info.compression = 0;
+ break;
+ case 's':
+ ssh_command = optarg;
+ break;
+ }
+ }
+
+ domain = argv[optind];
+ host = argv[optind + 1];
+
+ if (r_info.blackhole) {
+ find_domain(domain);
+ send_fd = open("/dev/null", O_RDWR, 0644);
+ if (send_fd < 0) {
+ perror("failed to open /dev/null");
+ exit(-1);
+ }
+ } else {
+
+ if (!ssh_command[0]) {
+ rune = host;
+ } else {
+ if (asprintf(&rune, "exec %s %s xl remus-receive",
+ ssh_command, host) < 0)
+ return 1;
+ }
+
+ save_domain_core_begin(domain, NULL, &config_data, &config_len);
+
+ if (!config_len) {
+ fprintf(stderr, "No config file stored for running domain and "
+ "none supplied - cannot start remus.\n");
+ exit(1);
+ }
+
+ MUST( libxl_pipe(ctx, sendpipe) );
+ MUST( libxl_pipe(ctx, recvpipe) );
+
+ child = libxl_fork(ctx);
+ if (child==-1) exit(1);
+
+ /* TODO: change this to plain TCP socket based channel
+ * instead of SSH.
+ */
+ if (!child) {
+ dup2(sendpipe[0], 0);
+ dup2(recvpipe[1], 1);
+ close(sendpipe[0]); close(sendpipe[1]);
+ close(recvpipe[0]); close(recvpipe[1]);
+ execlp("sh","sh","-c",rune,(char*)0);
+ perror("failed to exec sh");
+ exit(-1);
+ }
+
+ close(sendpipe[0]);
+ close(recvpipe[1]);
+ send_fd = sendpipe[1];
+ recv_fd = recvpipe[0];
+
+ signal(SIGPIPE, SIG_IGN);
+
+ save_domain_core_writeconfig(send_fd, "migration stream",
+ config_data, config_len);
+ }
+
+ /* Point of no return */
+ rc = libxl_domain_remus_start(ctx, &r_info, domid, send_fd);
+
+ /* If we are here, it means backup has failed/domain suspend failed.
+ * Try to resume the domain and exit gracefully.
+ */
+ fprintf(stderr, "remus sender: libxl_domain_suspend failed"
+ " (rc=%d)\n", rc);
+
+ if (rc == ERROR_GUEST_TIMEDOUT)
+ fprintf(stderr, "Failed to suspend domain at primary.\n");
+ else {
+ fprintf(stderr, "Remus: Backup failed? resuming domain at primary.\n");
+ libxl_domain_resume(ctx, domid);
+ }
+
+ close(send_fd);
+ return -ERROR_FAIL;
+}
+
/*
* Local variables:
* mode: C
diff -r 0446591bee86 -r 822536df4aec tools/libxl/xl_cmdtable.c
--- a/tools/libxl/xl_cmdtable.c Mon Jan 23 14:44:00 2012 -0800
+++ b/tools/libxl/xl_cmdtable.c Mon Jan 23 14:44:02 2012 -0800
@@ -407,6 +407,22 @@
"Loads a new policy int the Flask Xen security module",
"<policy file>",
},
+ { "remus-receive",
+ &main_remus_receive, 0,
+ "Remus Checkpoint Receiver",
+ "- for internal use only",
+ },
+ { "remus",
+ &main_remus, 0,
+ "Enable Remus HA for domain",
+ "[options] <Domain> [<DestinationHost>]",
+ "-i MS Checkpoint domain memory every MS milliseconds (def. 200ms).\n"
+ "-b Replicate memory checkpoints to /dev/null (blackhole)\n"
+ "-u Disable memory checkpoint compression.\n"
+ "-s <sshcommand> Use <sshcommand> instead of ssh. String will be passed\n"
+ " to sh. If empty, run <host> instead of \n"
+ " ssh <host> xl remus-receive\n"
+ },
};
int cmdtable_len = sizeof(cmd_table)/sizeof(struct cmd_spec);
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1 of 3] tools/libxl: QEMU device model suspend/resume
2012-01-23 22:46 ` [PATCH 1 of 3] tools/libxl: QEMU device model suspend/resume rshriram
@ 2012-01-25 11:08 ` Ian Campbell
2012-01-25 23:07 ` Shriram Rajagopalan
0 siblings, 1 reply; 13+ messages in thread
From: Ian Campbell @ 2012-01-25 11:08 UTC (permalink / raw)
To: rshriram@cs.ubc.ca
Cc: brendan@cs.ubc.ca, xen-devel@lists.xensource.com, Ian Jackson
On Mon, 2012-01-23 at 22:46 +0000, rshriram@cs.ubc.ca wrote:
> # HG changeset patch
> # User Shriram Rajagopalan <rshriram@cs.ubc.ca>
> # Date 1327358638 28800
> # Node ID 11fb1dfda7de4d6759dec87d80cd16cf137f7369
> # Parent 80fdf2182bc62ca358ba2f1a3513b47a4f8d9dfd
> tools/libxl: QEMU device model suspend/resume
>
> * Refactor the libxl__domain_save_device_model.
> * Add support for suspend/resume QEMU device model
> (both traditional xen and QMP).
>
>
> Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>
>
> diff -r 80fdf2182bc6 -r 11fb1dfda7de tools/libxl/libxl.c
> --- a/tools/libxl/libxl.c Sat Jan 21 17:15:40 2012 +0000
> +++ b/tools/libxl/libxl.c Mon Jan 23 14:43:58 2012 -0800
> @@ -477,7 +477,7 @@
>
> rc = libxl__domain_suspend_common(gc, domid, fd, type, live, debug);
> if (!rc && type == LIBXL_DOMAIN_TYPE_HVM)
> - rc = libxl__domain_save_device_model(gc, domid, fd);
> + rc = libxl__domain_save_device_model(gc, domid, fd, /* No Remus */ 0);
> GC_FREE;
> return rc;
> }
> diff -r 80fdf2182bc6 -r 11fb1dfda7de tools/libxl/libxl_dom.c
> --- a/tools/libxl/libxl_dom.c Sat Jan 21 17:15:40 2012 +0000
> +++ b/tools/libxl/libxl_dom.c Mon Jan 23 14:43:58 2012 -0800
> @@ -534,6 +534,53 @@
> return 0;
> }
>
> +static int libxl__remus_domain_suspend_qemu(libxl__gc *gc, uint32_t domid)
> +{
> +
> + switch (libxl__device_model_version_running(gc, domid)) {
> + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
> + char *path = NULL;
> + path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command",
> + domid);
> + libxl__xs_write(gc, XBT_NULL, path, "save");
> + libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL, NULL);
This and libxl__remus_domain_resume_qemu, qemu_pci_add_xenstore,
qemu_pci_remove_xenstore, libxl__domain_save_device_model and
libxl_domain_unpause would probably benefit from a helper function to
send a command to a traditional qemu.
> + break;
> + }
> + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
> + /* Stop QEMU */
> + if (libxl__qmp_stop(gc, domid))
> + return ERROR_FAIL;
> + break;
> + default:
> + return ERROR_INVAL;
> + }
> +
> + return 0;
> +}
> +
> +static int libxl__remus_domain_resume_qemu(libxl__gc *gc, uint32_t domid)
> +{
> +
> + switch (libxl__device_model_version_running(gc, domid)) {
> + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
> + char *path = NULL;
> + path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command",
> + domid);
> + libxl__xs_write(gc, XBT_NULL, path, "continue");
> + break;
> + }
> + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
> + /* Stop QEMU */
> + if (libxl__qmp_resume(gc, domid))
> + return ERROR_FAIL;
> + break;
> + default:
> + return ERROR_INVAL;
> + }
> +
> + return 0;
> +}
> +
> int libxl__domain_suspend_common(libxl__gc *gc, uint32_t domid, int fd,
> libxl_domain_type type,
> int live, int debug)
> @@ -620,7 +667,7 @@
> return rc;
> }
>
> -int libxl__domain_save_device_model(libxl__gc *gc, uint32_t domid, int fd)
> +int libxl__domain_save_device_model(libxl__gc *gc, uint32_t domid, int fd, int remus)
> {
> libxl_ctx *ctx = libxl__gc_owner(gc);
> int ret, fd2 = -1, c;
> @@ -631,13 +678,12 @@
>
> switch (libxl__device_model_version_running(gc, domid)) {
> case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
> - char *path = NULL;
> - LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
> - "Saving device model state to %s", filename);
> - path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command",
> - domid);
> - libxl__xs_write(gc, XBT_NULL, path, "save");
> - libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL, NULL);
> + /* For Remus,we issue suspend_qemu call separately */
Why?
How does this interact with Stefano's XC_SAVEID_TOOLSTACK patches?
I think some sort of high level description of the control flow used by
Remus and how/why it differs from normal save/restore would be useful
for review.
> + if (!remus) {
> + c = libxl__remus_domain_suspend_qemu(gc, domid);
It seems odd to call this function libxl__remus_FOO and the use it
when !remus. The function doesn't look to be inherently specific to
either remus or !remus anyhow.
> + if (c)
> + return c;
> + }
> break;
> }
> case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
> @@ -668,8 +714,9 @@
> qemu_state_len = st.st_size;
> LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Qemu state is %d bytes\n", qemu_state_len);
>
> - ret = libxl_write_exactly(ctx, fd, QEMU_SIGNATURE, strlen(QEMU_SIGNATURE),
> - "saved-state file", "qemu signature");
> + ret = libxl_write_exactly(ctx, fd, remus ? REMUS_QEMU_SIGNATURE : QEMU_SIGNATURE,
> + remus ? strlen(REMUS_QEMU_SIGNATURE): strlen(QEMU_SIGNATURE),
> + "saved-state file", "qemu signature");
QEMU_SIGNATURE is "DeviceModelRecord0002" which I believe libxc treats
identically to the Remus signature?
Again consideration of how this interacts with Stefano's patch would be
useful. If possible we'd quite like to pull the qemu-restore our of
xc_domain_restore for consistency with how xc_domain_save saves it, the
current layering is quite mad.
Ian.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2 of 3] tools/libxl: suspend/postflush/commit callbacks
2012-01-23 22:46 ` [PATCH 2 of 3] tools/libxl: suspend/postflush/commit callbacks rshriram
@ 2012-01-25 11:22 ` Ian Campbell
2012-01-25 23:15 ` Shriram Rajagopalan
0 siblings, 1 reply; 13+ messages in thread
From: Ian Campbell @ 2012-01-25 11:22 UTC (permalink / raw)
To: rshriram@cs.ubc.ca
Cc: brendan@cs.ubc.ca, xen-devel@lists.xensource.com, Ian Jackson
On Mon, 2012-01-23 at 22:46 +0000, rshriram@cs.ubc.ca wrote:
> # HG changeset patch
> # User Shriram Rajagopalan <rshriram@cs.ubc.ca>
> # Date 1327358640 28800
> # Node ID 0446591bee86eb4e767d75b70c885549c7a3cfef
> # Parent 11fb1dfda7de4d6759dec87d80cd16cf137f7369
> tools/libxl: suspend/postflush/commit callbacks
>
> * Add libxl callbacks for Remus checkpoint suspend, postflush (aka resume)
> and checkpoint commit callbacks.
> * suspend callback just bounces off libxl__domain_suspend_common_callback
> * resume callback directly calls xc_domain_resume instead of re-using
> libxl_domain_resume (as the latter does not set the fast suspend argument
> to xc_domain_resume - aka suspend_cancel support).
> * commit callback just sleeps for the checkpoint interval (for the moment).
>
> * Future patches will augument these callbacks with more functionalities like
> issuing network buffer plug/unplug commands, disk checkpoint commands, etc.
>
> Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>
>
> diff -r 11fb1dfda7de -r 0446591bee86 tools/libxl/libxl.h
> --- a/tools/libxl/libxl.h Mon Jan 23 14:43:58 2012 -0800
> +++ b/tools/libxl/libxl.h Mon Jan 23 14:44:00 2012 -0800
> @@ -212,6 +212,12 @@
> int (*suspend_callback)(void *, int);
> } libxl_domain_suspend_info;
>
> +typedef struct {
> + int interval;
> + int blackhole;
> + int compression;
> +} libxl_domain_remus_info;
This should be defined via the libxl IDL (see libxl_types.idl and
idl.txt)
> +
> enum {
> ERROR_NONSPECIFIC = -1,
> ERROR_VERSION = -2,
> diff -r 11fb1dfda7de -r 0446591bee86 tools/libxl/libxl_dom.c
> --- a/tools/libxl/libxl_dom.c Mon Jan 23 14:43:58 2012 -0800
> +++ b/tools/libxl/libxl_dom.c Mon Jan 23 14:44:00 2012 -0800
> @@ -395,6 +395,8 @@
> int hvm;
> unsigned int flags;
> int guest_responded;
> + int save_fd; /* Migration stream fd (for Remus) */
> + int interval; /* remus checkpoint interval */
> };
>
> static int libxl__domain_suspend_common_switch_qemu_logdirty(int domid, unsigned int enable, void *data)
> @@ -581,9 +583,69 @@
> return 0;
> }
>
> +static int libxl__remus_domain_suspend_callback(void *data)
> +{
> + struct suspendinfo *si = data;
> +
> + if (libxl__domain_suspend_common_callback(data)) {
if (!libxl_domain_suspend_common_callback)
return 1;
if (si->hvm && !libxl__remus_domain_suspend(...)
return 1;
return 0;
Is would be clearer than the multiple nested if/elses and returns.
> + if (si->hvm) {
> + if (!libxl__remus_domain_suspend_qemu(si->gc, si->domid))
Perhaps libxl__domain_suspend_common_callback should do this in both the
Remus and non-Remus cases?
Likewise perhaps normal save should use the checkpoint hook to call
libxl__domain_save_device_model() as well.
I'm in favour of tweaking the normal suspend/resume case as necessary to
reduce the amount of Remus special casing that is required. IMHO a
normal suspend should look a lot like a Remus suspend which happened to
only do a single iteration.
> + return 1;
> + else
> + return 0;
> + }
> + return 1;
> + }
> + return 0;
> +}
> +
> +static int libxl__remus_domain_resume_callback(void *data)
> +{
> + struct suspendinfo *si = data;
> + libxl_ctx *ctx = libxl__gc_owner(si->gc);
> +
> + /* Assumes that SUSPEND_CANCEL is available - just like
> + * xm version of Remus. Without that, remus wont work.
> + * Since libxl_domain_resume calls xc_domain_resume with
> + * fast_suspend = 0, we cant re-use that api.
You could modify that API which would be better than duplicating its
content. I think the "fast" flag is useful to expose to users of libxl
but if not then you could refactor into an internal function which takes
the flag and call it from both here and libxl_domain_resume.
> + */
> + if (xc_domain_resume(ctx->xch, si->domid, 1)) {
> + LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
> + "xc_domain_resume failed for domain %u",
> + si->domid);
> + return 0;
> + }
> + if (!xs_resume_domain(ctx->xsh, si->domid)) {
> + LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
> + "xs_resume_domain failed for domain %u",
> + si->domid);
> + return 0;
> + }
> +
> + if (si->hvm) {
> + if (libxl__remus_domain_resume_qemu(si->gc, si->domid))
> + return 0;
> + }
> + return 1;
> +}
> +
> +/* For now, just sleep. Later, we need to release disk/netbuf */
> +static int libxl__remus_domain_checkpoint_callback(void *data)
> +{
> + struct suspendinfo *si = data;
> +
> + if (si->hvm && libxl__domain_save_device_model(si->gc, si->domid,
> + si->save_fd, 1))
> + return 0;
> +
> + usleep(si->interval * 1000);
> + return 1;
> +}
> +
> int libxl__domain_suspend_common(libxl__gc *gc, uint32_t domid, int fd,
> libxl_domain_type type,
> - int live, int debug)
> + int live, int debug,
> + const libxl_domain_remus_info *r_info)
> {
> libxl_ctx *ctx = libxl__gc_owner(gc);
> int flags;
> @@ -614,10 +676,20 @@
Please can you add :
[diff]
showfunc = True
to your ~/.hgrc.
> return ERROR_INVAL;
> }
>
> + memset(&si, 0, sizeof(si));
> flags = (live) ? XCFLAGS_LIVE : 0
> | (debug) ? XCFLAGS_DEBUG : 0
> | (hvm) ? XCFLAGS_HVM : 0;
>
> + if (r_info != NULL) {
> + si.interval = r_info->interval;
> + if (r_info->compression)
> + flags |= XCFLAGS_CHECKPOINT_COMPRESS;
> + si.save_fd = fd;
> + }
> + else
> + si.save_fd = -1;
> +
> si.domid = domid;
> si.flags = flags;
> si.hvm = hvm;
> @@ -641,7 +713,13 @@
> }
>
> memset(&callbacks, 0, sizeof(callbacks));
> - callbacks.suspend = libxl__domain_suspend_common_callback;
> + if (r_info != NULL) {
> + callbacks.suspend = libxl__remus_domain_suspend_callback;
> + callbacks.postcopy = libxl__remus_domain_resume_callback;
> + callbacks.checkpoint = libxl__remus_domain_checkpoint_callback;
> + } else
> + callbacks.suspend = libxl__domain_suspend_common_callback;
> +
> callbacks.switch_qemu_logdirty = libxl__domain_suspend_common_switch_qemu_logdirty;
> callbacks.data = &si;
>
> diff -r 11fb1dfda7de -r 0446591bee86 tools/libxl/libxl_internal.h
> --- a/tools/libxl/libxl_internal.h Mon Jan 23 14:43:58 2012 -0800
> +++ b/tools/libxl/libxl_internal.h Mon Jan 23 14:44:00 2012 -0800
> @@ -272,7 +272,8 @@
> int fd);
> _hidden int libxl__domain_suspend_common(libxl__gc *gc, uint32_t domid, int fd,
> libxl_domain_type type,
> - int live, int debug);
> + int live, int debug,
> + const libxl_domain_remus_info *r_info);
> _hidden const char *libxl__device_model_savefile(libxl__gc *gc, uint32_t domid);
> _hidden int libxl__domain_save_device_model(libxl__gc *gc, uint32_t domid, int fd,
> int remus);
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 3 of 3] tools/libxl: xl remus/remus-receive commands
2012-01-23 22:46 ` [PATCH 3 of 3] tools/libxl: xl remus/remus-receive commands rshriram
@ 2012-01-25 11:52 ` Ian Campbell
2012-01-26 0:09 ` Shriram Rajagopalan
0 siblings, 1 reply; 13+ messages in thread
From: Ian Campbell @ 2012-01-25 11:52 UTC (permalink / raw)
To: rshriram@cs.ubc.ca
Cc: brendan@cs.ubc.ca, xen-devel@lists.xensource.com, Ian Jackson
On Mon, 2012-01-23 at 22:46 +0000, rshriram@cs.ubc.ca wrote:
> # HG changeset patch
> # User Shriram Rajagopalan <rshriram@cs.ubc.ca>
> # Date 1327358642 28800
> # Node ID 822536df4aeced5aee00f1f26299086faa622681
> # Parent 0446591bee86eb4e767d75b70c885549c7a3cfef
> tools/libxl: xl remus/remus-receive commands
>
> * xl remus (and its receive counterpart remus-receive) act as frontends
> to enable remus for a given domain.
> * At the moment, only memory checkpointing and blackhole replication are
> supported. Support for disk checkpointing and network buffering will
> be added in future.
> * xl remus borrows some aspects of xl migrate. Replication is currently
> done over a ssh connection. Future versions will use a low-overhead
> plain tcp socket for replication (similar to xend/remus).
Please also patch docs/man/xl.*.pod as required.
Other references to relevant documentation and/or the addition of such
documentation to the tree would also be useful.
>
> Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>
>
> diff -r 0446591bee86 -r 822536df4aec tools/libxl/libxl.c
> --- a/tools/libxl/libxl.c Mon Jan 23 14:44:00 2012 -0800
> +++ b/tools/libxl/libxl.c Mon Jan 23 14:44:02 2012 -0800
> @@ -466,6 +466,40 @@
> return ptr;
> }
>
> +int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info,
> + uint32_t domid, int fd)
> +{
> + GC_INIT(ctx);
> + libxl_domain_type type = libxl__domain_type(gc, domid);
> + int rc = 0;
> +
> + if (info == NULL) {
> + LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
> + "No remus_info structure supplied for domain %d", domid);
> + rc = -1;
> + goto remus_fail;
> + }
> +
> + /* TBD: Remus setup - i.e. attach qdisc, enable disk buffering, etc */
> +
> + /* Point of no return */
> + rc = libxl__domain_suspend_common(gc, domid, fd, type, /* live */ 1,
> + /* debug */ 0, info);
> +
> + /*
> + * With Remus, if we reach this point, it means either
> + * backup died or some network error occurred preventing us
> + * from sending checkpoints.
> + */
> +
> + /* TBD: Remus cleanup - i.e. detach qdisc, release other
> + * resources.
> + */
> + remus_fail:
> + GC_FREE;
> + return rc;
> +}
> +
> int libxl_domain_suspend(libxl_ctx *ctx, libxl_domain_suspend_info *info,
> uint32_t domid, int fd)
> {
> diff -r 0446591bee86 -r 822536df4aec tools/libxl/xl_cmdimpl.c
> --- a/tools/libxl/xl_cmdimpl.c Mon Jan 23 14:44:00 2012 -0800
> +++ b/tools/libxl/xl_cmdimpl.c Mon Jan 23 14:44:02 2012 -0800
> @@ -1814,7 +1814,7 @@
> * If we have daemonized then do not return to the caller -- this has
> * already happened in the parent.
> */
> - if ( !need_daemon )
> + if ( daemonize && !need_daemon )
What is this change for?
> exit(ret);
>
> return ret;
> @@ -5853,6 +5853,175 @@
> return ret;
> }
>
> +int main_remus_receive(int argc, char **argv)
Can this not be done as a flag to migrate-receive? Likewise is there
common code between the remus migrate and regular migration?
Is there any reason that remus would not benefit from the xl migration
protocol preamble?
> +{
> + int rc;
> + char *migration_domname;
> + struct domain_create dom_info;
> +
> + signal(SIGPIPE, SIG_IGN);
> + memset(&dom_info, 0, sizeof(dom_info));
> + dom_info.debug = 1;
> + dom_info.no_incr_generationid = 1;
> + dom_info.restore_file = "incoming checkpoint stream";
> + dom_info.migrate_fd = 0; /* stdin - will change in future*/
> + dom_info.migration_domname_r = &migration_domname;
> +
> + rc = create_domain(&dom_info);
I'm totally failing to see where the Remus'ness of this create_domain
comes from.
> + if (rc < 0) {
> + fprintf(stderr, "migration target (Remus): Domain creation failed"
> + " (code %d) domid %u.\n", rc, domid);
> + exit(-rc);
> + }
> +
> + /* If we are here, it means that the sender (primary) has crashed.
> + * If domain renaming fails, lets just continue (as we need the domain
> + * to be up & dom names may not matter much, as long as its reachable
> + * over network).
> + *
> + * If domain unpausing fails, destroy domain ? Or is it better to have
> + * a consistent copy of the domain (memory, cpu state, disk)
> + * on atleast one physical host ? Right now, lets just leave the domain
at least
> + * as is and let the Administrator decide (or troubleshoot).
> + */
> + fprintf(stderr, "migration target: Remus Failover for domain %u\n", domid);
> + if (migration_domname) {
> + rc = libxl_domain_rename(ctx, domid, migration_domname,
> + common_domname);
> + if (rc)
> + fprintf(stderr, "migration target (Remus): "
> + "Failed to rename domain from %s to %s:%d\n",
> + migration_domname, common_domname, rc);
> +
> + rc = libxl_domain_unpause(ctx, domid);
> + if (rc)
> + fprintf(stderr, "migration target (Remus): "
> + "Failed to unpause domain %s (id: %u):%d\n",
> + common_domname, domid, rc);
> + }
> +
> + return -rc;
> +}
> +
> +int main_remus(int argc, char **argv)
> +{
> + int opt, rc;
> + const char *ssh_command = "ssh";
> + char *host = NULL, *rune = NULL, *domain = NULL;
> +
> + int sendpipe[2], recvpipe[2];
> + int send_fd = -1, recv_fd = -1;
> + pid_t child = -1;
> +
> + uint8_t *config_data;
> + int config_len;
> +
> + libxl_domain_remus_info r_info;
> +
> + memset(&r_info, 0, sizeof(libxl_domain_remus_info));
> + /* Defaults */
> + r_info.interval = 200;
> + r_info.blackhole = 0;
> + r_info.compression = 1;
> +
> + while ((opt = def_getopt(argc, argv, "bui:s:", "remus", 2)) != -1) {
main_migrate handles a bunch of other options which might also be
interesting in the Remus case? Better would be to add all this as an
option to the std migrate.
> + switch (opt) {
> + case 0: case 2:
> + return opt;
> +
> + case 'i':
> + r_info.interval = atoi(optarg);
> + break;
> + case 'b':
> + r_info.blackhole = 1;
> + break;
> + case 'u':
> + r_info.compression = 0;
> + break;
> + case 's':
> + ssh_command = optarg;
> + break;
> + }
> + }
> +
> + domain = argv[optind];
> + host = argv[optind + 1];
> +
> + if (r_info.blackhole) {
> + find_domain(domain);
> + send_fd = open("/dev/null", O_RDWR, 0644);
> + if (send_fd < 0) {
> + perror("failed to open /dev/null");
> + exit(-1);
> + }
> + } else {
The following duplicates an awful lot of main_migrate which begs for
them to either be the same underlying command or at least for some
refactoring.
> +
> + if (!ssh_command[0]) {
> + rune = host;
> + } else {
> + if (asprintf(&rune, "exec %s %s xl remus-receive",
> + ssh_command, host) < 0)
> + return 1;
> + }
> +
> + save_domain_core_begin(domain, NULL, &config_data, &config_len);
> +
> + if (!config_len) {
> + fprintf(stderr, "No config file stored for running domain and "
> + "none supplied - cannot start remus.\n");
> + exit(1);
> + }
> +
> + MUST( libxl_pipe(ctx, sendpipe) );
> + MUST( libxl_pipe(ctx, recvpipe) );
> +
> + child = libxl_fork(ctx);
> + if (child==-1) exit(1);
> +
> + /* TODO: change this to plain TCP socket based channel
> + * instead of SSH.
There are good reasons for using ssh though. However the user can supply
another command which is not encrypted if they want to.
Whatever happens here the same arguments would apply to non-remus
migration so the changes should be done for both (or better the code
should be made more common.
Ian.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1 of 3] tools/libxl: QEMU device model suspend/resume
2012-01-25 11:08 ` Ian Campbell
@ 2012-01-25 23:07 ` Shriram Rajagopalan
2012-01-26 9:18 ` Ian Campbell
0 siblings, 1 reply; 13+ messages in thread
From: Shriram Rajagopalan @ 2012-01-25 23:07 UTC (permalink / raw)
To: Ian Campbell
Cc: brendan@cs.ubc.ca, xen-devel@lists.xensource.com, Ian Jackson
[-- Attachment #1.1: Type: text/plain, Size: 6678 bytes --]
On Wed, Jan 25, 2012 at 3:08 AM, Ian Campbell <Ian.Campbell@citrix.com>wrote:
> On Mon, 2012-01-23 at 22:46 +0000, rshriram@cs.ubc.ca wrote:
> > # HG changeset patch
> > # User Shriram Rajagopalan <rshriram@cs.ubc.ca>
> > # Date 1327358638 28800
> > # Node ID 11fb1dfda7de4d6759dec87d80cd16cf137f7369
> > # Parent 80fdf2182bc62ca358ba2f1a3513b47a4f8d9dfd
> > tools/libxl: QEMU device model suspend/resume
> >
> > * Refactor the libxl__domain_save_device_model.
> > * Add support for suspend/resume QEMU device model
> > (both traditional xen and QMP).
> >
> >
> > Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>
> >
> > diff -r 80fdf2182bc6 -r 11fb1dfda7de tools/libxl/libxl.c
> > --- a/tools/libxl/libxl.c Sat Jan 21 17:15:40 2012 +0000
> > +++ b/tools/libxl/libxl.c Mon Jan 23 14:43:58 2012 -0800
> > @@ -477,7 +477,7 @@
> >
> > rc = libxl__domain_suspend_common(gc, domid, fd, type, live, debug);
> > if (!rc && type == LIBXL_DOMAIN_TYPE_HVM)
> > - rc = libxl__domain_save_device_model(gc, domid, fd);
> > + rc = libxl__domain_save_device_model(gc, domid, fd, /* No Remus
> */ 0);
> > GC_FREE;
> > return rc;
> > }
> > diff -r 80fdf2182bc6 -r 11fb1dfda7de tools/libxl/libxl_dom.c
> > --- a/tools/libxl/libxl_dom.c Sat Jan 21 17:15:40 2012 +0000
> > +++ b/tools/libxl/libxl_dom.c Mon Jan 23 14:43:58 2012 -0800
> > @@ -534,6 +534,53 @@
> > return 0;
> > }
> >
> > +static int libxl__remus_domain_suspend_qemu(libxl__gc *gc, uint32_t
> domid)
> > +{
> > +
> > + switch (libxl__device_model_version_running(gc, domid)) {
> > + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
> > + char *path = NULL;
> > + path = libxl__sprintf(gc,
> "/local/domain/0/device-model/%d/command",
> > + domid);
> > + libxl__xs_write(gc, XBT_NULL, path, "save");
> > + libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL,
> NULL);
>
> This and libxl__remus_domain_resume_qemu, qemu_pci_add_xenstore,
> qemu_pci_remove_xenstore, libxl__domain_save_device_model and
> libxl_domain_unpause would probably benefit from a helper function to
> send a command to a traditional qemu.
>
> > + break;
> > + }
> > + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
> > + /* Stop QEMU */
> > + if (libxl__qmp_stop(gc, domid))
> > + return ERROR_FAIL;
> > + break;
> > + default:
> > + return ERROR_INVAL;
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +static int libxl__remus_domain_resume_qemu(libxl__gc *gc, uint32_t
> domid)
> > +{
> > +
> > + switch (libxl__device_model_version_running(gc, domid)) {
> > + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
> > + char *path = NULL;
> > + path = libxl__sprintf(gc,
> "/local/domain/0/device-model/%d/command",
> > + domid);
> > + libxl__xs_write(gc, XBT_NULL, path, "continue");
> > + break;
> > + }
> > + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
> > + /* Stop QEMU */
> > + if (libxl__qmp_resume(gc, domid))
> > + return ERROR_FAIL;
> > + break;
> > + default:
> > + return ERROR_INVAL;
> > + }
> > +
> > + return 0;
> > +}
> > +
> > int libxl__domain_suspend_common(libxl__gc *gc, uint32_t domid, int fd,
> > libxl_domain_type type,
> > int live, int debug)
> > @@ -620,7 +667,7 @@
> > return rc;
> > }
> >
> > -int libxl__domain_save_device_model(libxl__gc *gc, uint32_t domid, int
> fd)
> > +int libxl__domain_save_device_model(libxl__gc *gc, uint32_t domid, int
> fd, int remus)
> > {
> > libxl_ctx *ctx = libxl__gc_owner(gc);
> > int ret, fd2 = -1, c;
> > @@ -631,13 +678,12 @@
> >
> > switch (libxl__device_model_version_running(gc, domid)) {
> > case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
> > - char *path = NULL;
> > - LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
> > - "Saving device model state to %s", filename);
> > - path = libxl__sprintf(gc,
> "/local/domain/0/device-model/%d/command",
> > - domid);
> > - libxl__xs_write(gc, XBT_NULL, path, "save");
> > - libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL,
> NULL);
> > + /* For Remus,we issue suspend_qemu call separately */
>
> Why?
>
>
In remus, save and transmit device model phases are decoupled. This code
(sending the saved device model to the target) is executed after the domain
is
resumed.
The control flow in current Remus is like this:
1 suspend vm
1.1 suspend qemu (save command, that saves the device model to a file)
2 copy out dirty memory into a buffer
3 resume vm
3.1 resume qemu
4 send the data in the buffer
5 send the saved device model
(xc_domain_restore extracts this from the tailbuf)
> How does this interact with Stefano's XC_SAVEID_TOOLSTACK patches?
>
>
I couldnt find anything online by this name. Can you point me to the patch
series?
I think some sort of high level description of the control flow used by
> Remus and how/why it differs from normal save/restore would be useful
> for review.
>
> > + if (!remus) {
> > + c = libxl__remus_domain_suspend_qemu(gc, domid);
>
> It seems odd to call this function libxl__remus_FOO and the use it
> when !remus. The function doesn't look to be inherently specific to
> either remus or !remus anyhow.
>
> > + if (c)
> > + return c;
> > + }
> > break;
> > }
> > case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
> > @@ -668,8 +714,9 @@
> > qemu_state_len = st.st_size;
> > LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Qemu state is %d bytes\n",
> qemu_state_len);
> >
> > - ret = libxl_write_exactly(ctx, fd, QEMU_SIGNATURE,
> strlen(QEMU_SIGNATURE),
> > - "saved-state file", "qemu signature");
> > + ret = libxl_write_exactly(ctx, fd, remus ? REMUS_QEMU_SIGNATURE :
> QEMU_SIGNATURE,
> > + remus ? strlen(REMUS_QEMU_SIGNATURE):
> strlen(QEMU_SIGNATURE),
> > + "saved-state file", "qemu signature");
>
> QEMU_SIGNATURE is "DeviceModelRecord0002" which I believe libxc treats
> identically to the Remus signature?
>
>
Thanks. Yep, the remus_signature is redundant.
> Again consideration of how this interacts with Stefano's patch would be
> useful. If possible we'd quite like to pull the qemu-restore our of
> xc_domain_restore for consistency with how xc_domain_save saves it, the
> current layering is quite mad.
>
> Ian.
>
>
[-- Attachment #1.2: Type: text/html, Size: 8908 bytes --]
[-- Attachment #2: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2 of 3] tools/libxl: suspend/postflush/commit callbacks
2012-01-25 11:22 ` Ian Campbell
@ 2012-01-25 23:15 ` Shriram Rajagopalan
2012-01-26 9:23 ` Ian Campbell
0 siblings, 1 reply; 13+ messages in thread
From: Shriram Rajagopalan @ 2012-01-25 23:15 UTC (permalink / raw)
To: Ian Campbell
Cc: brendan@cs.ubc.ca, xen-devel@lists.xensource.com, Ian Jackson
[-- Attachment #1.1: Type: text/plain, Size: 2236 bytes --]
On Wed, Jan 25, 2012 at 3:22 AM, Ian Campbell <Ian.Campbell@citrix.com>wrote:
> On Mon, 2012-01-23 at 22:46 +0000, rshriram@cs.ubc.ca wrote:
> > # HG changeset patch
> > # User Shriram Rajagopalan <rshriram@cs.ubc.ca>
> > # Date 1327358640 28800
> > # Node ID 0446591bee86eb4e767d75b70c885549c7a3cfef
> > # Parent 11fb1dfda7de4d6759dec87d80cd16cf137f7369
> > tools/libxl: suspend/postflush/commit callbacks
> >
> > * Add libxl callbacks for Remus checkpoint suspend, postflush (aka
> resume)
> > and checkpoint commit callbacks.
> > * suspend callback just bounces off
> libxl__domain_suspend_common_callback
> > * resume callback directly calls xc_domain_resume instead of re-using
> > libxl_domain_resume (as the latter does not set the fast suspend
> argument
> > to xc_domain_resume - aka suspend_cancel support).
> > * commit callback just sleeps for the checkpoint interval (for the
> moment).
> >
> > * Future patches will augument these callbacks with more
> functionalities like
> > issuing network buffer plug/unplug commands, disk checkpoint
> commands, etc.
> >
> > Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>
> >
>
>
> > +static int libxl__remus_domain_resume_callback(void *data)
> > +{
> > + struct suspendinfo *si = data;
> > + libxl_ctx *ctx = libxl__gc_owner(si->gc);
> > +
> > + /* Assumes that SUSPEND_CANCEL is available - just like
> > + * xm version of Remus. Without that, remus wont work.
> > + * Since libxl_domain_resume calls xc_domain_resume with
> > + * fast_suspend = 0, we cant re-use that api.
>
> You could modify that API which would be better than duplicating its
> content. I think the "fast" flag is useful to expose to users of libxl
> but if not then you could refactor into an internal function which takes
> the flag and call it from both here and libxl_domain_resume.
>
>
I didnt want to touch any existing public interfaces, structures, etc,
especially
something so common like domain_resume. While users of xl resume (or
unpause)
wont see any difference, other libraries using the libxl API might be
affected.
But I am in favor of "exposing" the flag instead of hiding it in an
internal function,
if there are no objections :).
shriram
[-- Attachment #1.2: Type: text/html, Size: 2963 bytes --]
[-- Attachment #2: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 3 of 3] tools/libxl: xl remus/remus-receive commands
2012-01-25 11:52 ` Ian Campbell
@ 2012-01-26 0:09 ` Shriram Rajagopalan
2012-01-26 10:37 ` Ian Campbell
0 siblings, 1 reply; 13+ messages in thread
From: Shriram Rajagopalan @ 2012-01-26 0:09 UTC (permalink / raw)
To: Ian Campbell
Cc: brendan@cs.ubc.ca, xen-devel@lists.xensource.com, Ian Jackson
[-- Attachment #1.1: Type: text/plain, Size: 8676 bytes --]
On Wed, Jan 25, 2012 at 3:52 AM, Ian Campbell <Ian.Campbell@citrix.com>wrote:
> >
>
> > diff -r 0446591bee86 -r 822536df4aec tools/libxl/xl_cmdimpl.c
> > --- a/tools/libxl/xl_cmdimpl.c Mon Jan 23 14:44:00 2012 -0800
> > +++ b/tools/libxl/xl_cmdimpl.c Mon Jan 23 14:44:02 2012 -0800
> > @@ -1814,7 +1814,7 @@
> > * If we have daemonized then do not return to the caller -- this
> has
> > * already happened in the parent.
> > */
> > - if ( !need_daemon )
> > + if ( daemonize && !need_daemon )
>
> What is this change for?
>
>
Sorry my bad. I should have sent it out as a separate patch. Well, if you
look at the control flow in the create_domain function , you would realize
that this is a bug.
create_domain()
{
int daemonize = dom_info->daemonize;
....
int need_daemon = daemonize;
restore_domain() or create_new()
if (!daemonize && !monitor) goto out;
if (need_daemon) {
fork()
daemon()..
need_daemon = 0;
}
...
out:
/* If we have daemonized then do not return to the caller -- this
has
* already happened in the parent.
*/
if ( !need_daemon )
exit()
}
So, even if one explicitly sets daemonize to 0, the function will never
return to the
caller. I needed it to return to the caller (remus_receiver() ), to take
further actions.
> exit(ret);
> >
> > return ret;
> > @@ -5853,6 +5853,175 @@
> > return ret;
> > }
> >
> > +int main_remus_receive(int argc, char **argv)
>
> Can this not be done as a flag to migrate-receive? Likewise is there
> common code between the remus migrate and regular migration?
>
>
It can but it would basically terminate after the create_domain() call in
the
migrate_receive function. I would have to have something like
if (remus) {
dont wait for permission to start domain
try renaming domain
unpause domain
return
}
and a bunch of if (remus) flags peppered around the migrate receive
function.
Having it in a separate function allows me to add support for creating a TCP
channel that receives the checkpoints, add hooks or call external
libraries/binaries
that allows the user to check for split-brain before resuming the domain.
Is there any reason that remus would not benefit from the xl migration
> protocol preamble?
>
>
No specific reason.
> +{
> > + int rc;
> > + char *migration_domname;
> > + struct domain_create dom_info;
> > +
> > + signal(SIGPIPE, SIG_IGN);
> > + memset(&dom_info, 0, sizeof(dom_info));
> > + dom_info.debug = 1;
> > + dom_info.no_incr_generationid = 1;
> > + dom_info.restore_file = "incoming checkpoint stream";
> > + dom_info.migrate_fd = 0; /* stdin - will change in future*/
> > + dom_info.migration_domname_r = &migration_domname;
> > +
> > + rc = create_domain(&dom_info);
>
> I'm totally failing to see where the Remus'ness of this create_domain
> comes from.
>
>
dom_info.daemonize = 0, .monitor = 0 and .paused = 0;
As I said earlier, this part is probably the only duplication between
migrate-receive
and remus-receive. With Xend, there was no separate remus receiver, to
control
what happens on the backup host. Now that I have an opportunity to
incorporate
remus into the toolstack from start, I thought I will keep the remus
control flow as separate as possible
and not have to worry about breaking live migration (nor complicate its
code).
> + if (rc < 0) {
> > + fprintf(stderr, "migration target (Remus): Domain creation
> failed"
> > + " (code %d) domid %u.\n", rc, domid);
> > + exit(-rc);
> > + }
> > +
> > + /* If we are here, it means that the sender (primary) has crashed.
> > + * If domain renaming fails, lets just continue (as we need the
> domain
> > + * to be up & dom names may not matter much, as long as its
> reachable
> > + * over network).
> > + *
> > + * If domain unpausing fails, destroy domain ? Or is it better to
> have
> > + * a consistent copy of the domain (memory, cpu state, disk)
> > + * on atleast one physical host ? Right now, lets just leave the
> domain
>
> at least
>
> > + * as is and let the Administrator decide (or troubleshoot).
> > + */
> > + fprintf(stderr, "migration target: Remus Failover for domain %u\n",
> domid);
> > + if (migration_domname) {
> > + rc = libxl_domain_rename(ctx, domid, migration_domname,
> > + common_domname);
> > + if (rc)
> > + fprintf(stderr, "migration target (Remus): "
> > + "Failed to rename domain from %s to %s:%d\n",
> > + migration_domname, common_domname, rc);
> > +
> > + rc = libxl_domain_unpause(ctx, domid);
> > + if (rc)
> > + fprintf(stderr, "migration target (Remus): "
> > + "Failed to unpause domain %s (id: %u):%d\n",
> > + common_domname, domid, rc);
> > + }
> > +
> > + return -rc;
> > +}
> > +
> > +int main_remus(int argc, char **argv)
> > +{
> > + int opt, rc;
> > + const char *ssh_command = "ssh";
> > + char *host = NULL, *rune = NULL, *domain = NULL;
> > +
> > + int sendpipe[2], recvpipe[2];
> > + int send_fd = -1, recv_fd = -1;
> > + pid_t child = -1;
> > +
> > + uint8_t *config_data;
> > + int config_len;
> > +
> > + libxl_domain_remus_info r_info;
> > +
> > + memset(&r_info, 0, sizeof(libxl_domain_remus_info));
> > + /* Defaults */
> > + r_info.interval = 200;
> > + r_info.blackhole = 0;
> > + r_info.compression = 1;
> > +
> > + while ((opt = def_getopt(argc, argv, "bui:s:", "remus", 2)) != -1) {
>
> main_migrate handles a bunch of other options which might also be
> interesting in the Remus case? Better would be to add all this as an
> option to the std migrate.
>
Negative. It would look totally unintuitive to have a checkpoint interval
option (or blackhole
replication option) in a live migration command. To an end user, remus is
just a HA system
and live migration is for moving VMs around. What is the point in trying to
educate him/her
that remus is basically "continuous live migration" ?
Imagine a command like
"xl migrate --R --i 100 --N Guest Backup"
and help options like
"-R enable remus HA
-i remus checkpoint interval
-N disable network buffering in Remus"
To you and me, as devs, it may not matter. But to common users who are
mostly going to
use just live migration, this would be utterly confusing.
> > + switch (opt) {
> > + case 0: case 2:
> > + return opt;
> > +
> > + case 'i':
> > + r_info.interval = atoi(optarg);
> > + break;
> > + case 'b':
> > + r_info.blackhole = 1;
> > + break;
> > + case 'u':
> > + r_info.compression = 0;
> > + break;
> > + case 's':
> > + ssh_command = optarg;
> > + break;
> > + }
> > + }
> > +
> > + domain = argv[optind];
> > + host = argv[optind + 1];
> > +
> > + if (r_info.blackhole) {
> > + find_domain(domain);
> > + send_fd = open("/dev/null", O_RDWR, 0644);
> > + if (send_fd < 0) {
> > + perror("failed to open /dev/null");
> > + exit(-1);
> > + }
> > + } else {
>
> The following duplicates an awful lot of main_migrate which begs for
> them to either be the same underlying command or at least for some
> refactoring.
>
>
I agree.
> > +
> > + if (!ssh_command[0]) {
> > + rune = host;
> > + } else {
> > + if (asprintf(&rune, "exec %s %s xl remus-receive",
> > + ssh_command, host) < 0)
> > + return 1;
> > + }
> > +
> > + save_domain_core_begin(domain, NULL, &config_data, &config_len);
> > +
> > + if (!config_len) {
> > + fprintf(stderr, "No config file stored for running domain
> and "
> > + "none supplied - cannot start remus.\n");
> > + exit(1);
> > + }
> > +
> > + MUST( libxl_pipe(ctx, sendpipe) );
> > + MUST( libxl_pipe(ctx, recvpipe) );
> > +
> > + child = libxl_fork(ctx);
> > + if (child==-1) exit(1);
> > +
> > + /* TODO: change this to plain TCP socket based channel
> > + * instead of SSH.
>
> There are good reasons for using ssh though. However the user can supply
> another command which is not encrypted if they want to.
>
Whatever happens here the same arguments would apply to non-remus
> migration so the changes should be done for both (or better the code
> should be made more common.
>
> Ian.
>
>
>
[-- Attachment #1.2: Type: text/html, Size: 11875 bytes --]
[-- Attachment #2: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1 of 3] tools/libxl: QEMU device model suspend/resume
2012-01-25 23:07 ` Shriram Rajagopalan
@ 2012-01-26 9:18 ` Ian Campbell
0 siblings, 0 replies; 13+ messages in thread
From: Ian Campbell @ 2012-01-26 9:18 UTC (permalink / raw)
To: rshriram@cs.ubc.ca
Cc: brendan@cs.ubc.ca, xen-devel@lists.xensource.com, Ian Jackson
On Wed, 2012-01-25 at 23:07 +0000, Shriram Rajagopalan wrote:
> On Wed, Jan 25, 2012 at 3:08 AM, Ian Campbell
> > - libxl__xs_write(gc, XBT_NULL, path, "save");
> > - libxl__wait_for_device_model(gc, domid, "paused",
> NULL, NULL, NULL);
> > + /* For Remus,we issue suspend_qemu call separately
> */
>
>
> Why?
>
>
> In remus, save and transmit device model phases are decoupled. This
> code (sending the saved device model to the target) is executed after
> the domain is resumed.
>
> The control flow in current Remus is like this:
> 1 suspend vm
> 1.1 suspend qemu (save command, that saves the device model to a
> file)
> 2 copy out dirty memory into a buffer
> 3 resume vm
> 3.1 resume qemu
> 4 send the data in the buffer
> 5 send the saved device model
> (xc_domain_restore extracts this from the tailbuf)
Is this basic flow incompatible with regular save? Isn't it equivalent
to to omitting phase 3? (I know regular save is structured a bit
different right now, but it could be done this way to minimise the
amount of Remus specific code).
Which "struct save_callbacks" members do each of these stages correspond
to?
I think a high level overview of the ordering of operations and the
usage of these hooks would make an ideal comment just above the
definition of "struct save_callbacks", what do you think?
> How does this interact with Stefano's XC_SAVEID_TOOLSTACK
> patches?
>
>
> I couldnt find anything online by this name. Can you point me to the
> patch series?
"libxl: save/restore qemu physmap" posted last Friday.
We would also like to see the qemu restore done in a callback hook for
symmetry with the save hook.
Ian.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2 of 3] tools/libxl: suspend/postflush/commit callbacks
2012-01-25 23:15 ` Shriram Rajagopalan
@ 2012-01-26 9:23 ` Ian Campbell
0 siblings, 0 replies; 13+ messages in thread
From: Ian Campbell @ 2012-01-26 9:23 UTC (permalink / raw)
To: rshriram@cs.ubc.ca
Cc: brendan@cs.ubc.ca, xen-devel@lists.xensource.com, Ian Jackson
On Wed, 2012-01-25 at 23:15 +0000, Shriram Rajagopalan wrote:
> On Wed, Jan 25, 2012 at 3:22 AM, Ian Campbell
> <Ian.Campbell@citrix.com> wrote:
> On Mon, 2012-01-23 at 22:46 +0000, rshriram@cs.ubc.ca wrote:
> > # HG changeset patch
> > # User Shriram Rajagopalan <rshriram@cs.ubc.ca>
> > # Date 1327358640 28800
> > # Node ID 0446591bee86eb4e767d75b70c885549c7a3cfef
> > # Parent 11fb1dfda7de4d6759dec87d80cd16cf137f7369
> > tools/libxl: suspend/postflush/commit callbacks
> >
> > * Add libxl callbacks for Remus checkpoint suspend,
> postflush (aka resume)
> > and checkpoint commit callbacks.
> > * suspend callback just bounces off
> libxl__domain_suspend_common_callback
> > * resume callback directly calls xc_domain_resume instead
> of re-using
> > libxl_domain_resume (as the latter does not set the fast
> suspend argument
> > to xc_domain_resume - aka suspend_cancel support).
> > * commit callback just sleeps for the checkpoint interval
> (for the moment).
> >
> > * Future patches will augument these callbacks with more
> functionalities like
> > issuing network buffer plug/unplug commands, disk
> checkpoint commands, etc.
> >
> > Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>
> >
>
>
>
> > +static int libxl__remus_domain_resume_callback(void *data)
> > +{
> > + struct suspendinfo *si = data;
> > + libxl_ctx *ctx = libxl__gc_owner(si->gc);
> > +
> > + /* Assumes that SUSPEND_CANCEL is available - just like
> > + * xm version of Remus. Without that, remus wont work.
> > + * Since libxl_domain_resume calls xc_domain_resume
> with
> > + * fast_suspend = 0, we cant re-use that api.
>
>
> You could modify that API which would be better than
> duplicating its
> content. I think the "fast" flag is useful to expose to users
> of libxl
> but if not then you could refactor into an internal function
> which takes
> the flag and call it from both here and libxl_domain_resume.
>
>
>
> I didnt want to touch any existing public interfaces, structures, etc,
> especially something so common like domain_resume.
Until Xen 4.2 it is fine to improve the public interface of libxl,
including adding a new parameter here if you think that is best.
> While users of xl resume (or unpause) wont see any difference, other
> libraries using the libxl API might be affected.
So long as they can still ask for the current behaviour that is fine.
> But I am in favor of "exposing" the flag instead of hiding it in an
> internal function, if there are no objections :).
None.
Ian.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 3 of 3] tools/libxl: xl remus/remus-receive commands
2012-01-26 0:09 ` Shriram Rajagopalan
@ 2012-01-26 10:37 ` Ian Campbell
0 siblings, 0 replies; 13+ messages in thread
From: Ian Campbell @ 2012-01-26 10:37 UTC (permalink / raw)
To: rshriram@cs.ubc.ca
Cc: brendan@cs.ubc.ca, xen-devel@lists.xensource.com, Ian Jackson
On Thu, 2012-01-26 at 00:09 +0000, Shriram Rajagopalan wrote:
> On Wed, Jan 25, 2012 at 3:52 AM, Ian Campbell
>
> Sorry my bad. I should have sent it out as a separate patch. Well, if
> you look at the control flow in the create_domain function , you would
> realize that this is a bug.
>
> create_domain()
> {
> int daemonize = dom_info->daemonize;
> ....
> int need_daemon = daemonize;
>
> restore_domain() or create_new()
>
> if (!daemonize && !monitor) goto out;
>
> if (need_daemon) {
> fork()
> daemon()..
> need_daemon = 0;
> }
> ...
> out:
> /* If we have daemonized then do not return to the caller -- this has
> * already happened in the parent.
> */
> if ( !need_daemon )
> exit()
>
> }
>
>
> So, even if one explicitly sets daemonize to 0, the function will never return to the
> caller. I needed it to return to the caller (remus_receiver() ), to take further actions.
Makes sense.
Although the default for remus-receive should be to daemonize otherwise
when the domain is resumed you won't get any automatic reboot/shutdown
handling etc.
> > exit(ret);
> >
> > return ret;
> > @@ -5853,6 +5853,175 @@
> > return ret;
> > }
> >
> > +int main_remus_receive(int argc, char **argv)
>
>
> Can this not be done as a flag to migrate-receive? Likewise is
> there common code between the remus migrate and regular
> migration?
>
>
> It can but it would basically terminate after the create_domain() call
> in the migrate_receive function. I would have to have something like
> if (remus) {
> dont wait for permission to start domain
> try renaming domain
> unpause domain
> return
> }
I think actually it becomes
if (!remus)
wait for permission to start domain
try renaming domain
unpause domain
However I take your point that there isn't that much in common after
create_domain once you remove the post migration interlock protocol.
> and a bunch of if (remus) flags peppered around the migrate receive
> function.
>
> Having it in a separate function allows me to add support for creating
> a TCP channel that receives the checkpoints,
If this is useful for Remus then I can't see any reason it wouldn't also
be useful for regular migration.
> This add hooks or call external libraries/binaries that allows the
> user to check for split-brain before resuming the domain.
Hooks are OK, if they are not used by standard migration they can be
NULL. They should of course be properly documented...
> > +{
> > + int rc;
> > + char *migration_domname;
> > + struct domain_create dom_info;
> > +
> > + signal(SIGPIPE, SIG_IGN);
> > + memset(&dom_info, 0, sizeof(dom_info));
> > + dom_info.debug = 1;
BTW, this should be an option.
> > + dom_info.no_incr_generationid = 1;
> > + dom_info.restore_file = "incoming checkpoint stream";
> > + dom_info.migrate_fd = 0; /* stdin - will change in
> future*/
> > + dom_info.migration_domname_r = &migration_domname;
> > +
> > + rc = create_domain(&dom_info);
>
>
> I'm totally failing to see where the Remus'ness of this
> create_domain comes from.
> dom_info.daemonize = 0, .monitor = 0 and .paused = 0;
And how do those options combine to == Remus?
migrate doesn't currently have a "paused" option but it is not beyond
the realms of possibility that such a thing might be desirable in the
future.
> As I said earlier, this part is probably the only duplication between
> migrate-receive and remus-receive. With Xend, there was no separate
> remus receiver, to control what happens on the backup host. Now that I
> have an opportunity to incorporate remus into the toolstack from
> start, I thought I will keep the remus control flow as separate as
> possible and not have to worry about breaking live migration (nor
> complicate its code).
I think this is the wrong approach and motivation. We want to combine
the two as much as possible to reduce the amount of Remus specific code
which requires maintenance. I don't think supporting Remus via the
migration code paths will unduly complicate things.
[...]
> > + while ((opt = def_getopt(argc, argv, "bui:s:", "remus",
> 2)) != -1) {
>
>
> main_migrate handles a bunch of other options which might also
> be
> interesting in the Remus case? Better would be to add all this
> as an option to the std migrate.
>
>
> Negative. It would look totally unintuitive to have a checkpoint
> interval option (or blackhole replication option) in a live migration
> command. To an end user, remus is just a HA system and live migration
> is for moving VMs around. What is the point in trying to educate
> him/her that remus is basically "continuous live migration" ?
You've convinced me on the user interface point.
However I think in terms of implementation we should aim for as much
common code as possible.
Likewise even if xl remus and xl migrate are separate commands they
should support the same common options where appropriate (e.g. -F,-d,-e,
possible -C etc).
I'm undecided where *-receive (which is not user visible) should be
separate or not -- this is the sort of thing which could in principle be
negotiated as part of the protocol. Based on the above it's not clear
how much code sharing there would actually be. I expect we can revisit
this once the bits which can should be shared actually are.
Ian.
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2012-01-26 10:37 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-01-23 22:46 [PATCH 0 of 3] Remus - libxl support rshriram
2012-01-23 22:46 ` [PATCH 1 of 3] tools/libxl: QEMU device model suspend/resume rshriram
2012-01-25 11:08 ` Ian Campbell
2012-01-25 23:07 ` Shriram Rajagopalan
2012-01-26 9:18 ` Ian Campbell
2012-01-23 22:46 ` [PATCH 2 of 3] tools/libxl: suspend/postflush/commit callbacks rshriram
2012-01-25 11:22 ` Ian Campbell
2012-01-25 23:15 ` Shriram Rajagopalan
2012-01-26 9:23 ` Ian Campbell
2012-01-23 22:46 ` [PATCH 3 of 3] tools/libxl: xl remus/remus-receive commands rshriram
2012-01-25 11:52 ` Ian Campbell
2012-01-26 0:09 ` Shriram Rajagopalan
2012-01-26 10:37 ` Ian Campbell
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).