From: Ian Jackson <ian.jackson@eu.citrix.com>
To: xen-devel@lists.xen.org
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Subject: [PATCH 08/15] libxl: wait for qemu to acknowledge logdirty command
Date: Wed, 30 May 2012 17:16:39 +0100 [thread overview]
Message-ID: <1338394606-22693-9-git-send-email-ian.jackson@eu.citrix.com> (raw)
In-Reply-To: <1338394606-22693-1-git-send-email-ian.jackson@eu.citrix.com>
The current migration code in libxl instructs qemu to start or stop
logdirty, but it does not wait for an acknowledgement from qemu before
continuing. This might lead to memory corruption (!)
Fix this by waiting for qemu to acknowledge the command.
Unfortunately the necessary ao arrangements for waiting for this
command are unique because qemu has a special protocol for this
particular operation.
Also, this change means that the switch_qemu_logdirty callback
implementation in libxl can no longer synchronously produce its return
value, as it now needs to wait for xenstore. So we tell the
marshalling code generator that it is a message which does not need a
reply. This turns the callback function called by the marshaller into
one which returns void; the callback function arranges to later
explicitly sends the reply to the helper, when the xs watch triggers
and the appropriate value is read from xenstore.
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
---
tools/libxl/libxl_dom.c | 176 +++++++++++++++++++++++++++++++++---
tools/libxl/libxl_internal.h | 18 ++++-
tools/libxl/libxl_save_callout.c | 8 ++
tools/libxl/libxl_save_msgs_gen.pl | 7 +-
4 files changed, 193 insertions(+), 16 deletions(-)
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 3e89dd7..b13ed51 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -526,30 +526,180 @@ int libxl__toolstack_restore(uint32_t domid, const uint8_t *buf,
static void domain_suspend_done(libxl__egc *egc,
libxl__domain_suspend_state *dss, int rc);
-/*----- callbacks, called by xc_domain_save -----*/
+/*----- complicated callback, called by xc_domain_save -----*/
+
+/*
+ * We implement the other end of protocol for controlling qemu-dm's
+ * logdirty. There is no documentation for this protocol, but our
+ * counterparty's implementation is in
+ * qemu-xen-traditional.git:xenstore.c in the function
+ * xenstore_process_logdirty_event
+ */
+
+static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
+ const struct timeval *requested_abs);
+static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch*,
+ const char *watch_path, const char *event_path);
+static void switch_logdirty_done(libxl__egc *egc,
+ libxl__domain_suspend_state *dss, int ok);
-int libxl__domain_suspend_common_switch_qemu_logdirty
+static void logdirty_init(libxl__logdirty_switch *lds)
+{
+ lds->cmd_path = 0;
+ libxl__ev_xswatch_init(&lds->watch);
+ libxl__ev_time_init(&lds->timeout);
+}
+
+void libxl__domain_suspend_common_switch_qemu_logdirty
(int domid, unsigned enable, void *user)
{
libxl__save_helper_state *shs = user;
+ libxl__egc *egc = shs->egc;
libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs);
+ libxl__logdirty_switch *lds = &dss->logdirty;
STATE_AO_GC(dss->ao);
- char *path;
- bool rc;
+ int rc;
+ xs_transaction_t t = 0;
+ const char *got;
- path = libxl__sprintf(gc,
+ if (!lds->cmd_path) {
+ lds->cmd_path = GCSPRINTF(
"/local/domain/0/device-model/%u/logdirty/cmd", domid);
- if (!path)
- return 1;
+ lds->ret_path = GCSPRINTF(
+ "/local/domain/0/device-model/%u/logdirty/ret", domid);
+ }
+ lds->cmd = enable ? "enable" : "disable";
- if (enable)
- rc = xs_write(CTX->xsh, XBT_NULL, path, "enable", strlen("enable"));
- else
- rc = xs_write(CTX->xsh, XBT_NULL, path, "disable", strlen("disable"));
+ rc = libxl__ev_xswatch_register(gc, &lds->watch,
+ switch_logdirty_xswatch, lds->ret_path);
+ if (rc) goto out;
+
+ rc = libxl__ev_time_register_rel(gc, &lds->timeout,
+ switch_logdirty_timeout, 10*1000);
+ if (rc) goto out;
+
+ for (;;) {
+ rc = libxl__xs_transaction_start(gc, &t);
+ if (rc) goto out;
+
+ rc = libxl__xs_read_checked(gc, t, lds->cmd_path, &got);
+ if (rc) goto out;
+
+ if (got) {
+ const char *got_ret;
+ rc = libxl__xs_read_checked(gc, t, lds->ret_path, &got_ret);
+ if (rc) goto out;
- return rc ? 0 : 1;
+ if (strcmp(got, got_ret)) {
+ LOG(ERROR,"controlling logdirty: qemu was already sent"
+ " command `%s' (xenstore path `%s') but result is `%s'",
+ got, lds->cmd_path, got_ret ? got_ret : "<none>");
+ rc = ERROR_FAIL;
+ goto out;
+ }
+ rc = libxl__xs_rm_checked(gc, t, lds->cmd_path);
+ if (rc) goto out;
+ }
+
+ rc = libxl__xs_rm_checked(gc, t, lds->ret_path);
+ if (rc) goto out;
+
+ rc = libxl__xs_write_checked(gc, t, lds->cmd_path, lds->cmd);
+ if (rc) goto out;
+
+ rc = libxl__xs_transaction_commit(gc, &t);
+ if (!rc) break;
+ if (rc<0) goto out;
+ }
+
+ /* OK, wait for some callback */
+ return;
+
+ out:
+ LOG(ERROR,"logdirty switch failed (rc=%d), aborting suspend",rc);
+ switch_logdirty_done(egc,dss,0);
+}
+
+static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
+ const struct timeval *requested_abs)
+{
+ libxl__domain_suspend_state *dss = CONTAINER_OF(ev, *dss, logdirty.timeout);
+ STATE_AO_GC(dss->ao);
+ LOG(ERROR,"logdirty switch: wait for device model timed out");
+ switch_logdirty_done(egc,dss,0);
}
+static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch *watch,
+ const char *watch_path, const char *event_path)
+{
+ libxl__domain_suspend_state *dss =
+ CONTAINER_OF(watch, *dss, logdirty.watch);
+ libxl__logdirty_switch *lds = &dss->logdirty;
+ STATE_AO_GC(dss->ao);
+ const char *got;
+ xs_transaction_t t = 0;
+ int rc;
+
+ for (;;) {
+ rc = libxl__xs_transaction_start(gc, &t);
+ if (rc) goto out;
+
+ rc = libxl__xs_read_checked(gc, t, lds->ret_path, &got);
+ if (rc) goto out;
+
+ if (!got) {
+ rc = +1;
+ goto out;
+ }
+
+ if (strcmp(got, lds->cmd)) {
+ LOG(ERROR,"logdirty switch: sent command `%s' but got reply `%s'"
+ " (xenstore paths `%s' / `%s')", lds->cmd, got,
+ lds->cmd_path, lds->ret_path);
+ rc = ERROR_FAIL;
+ goto out;
+ }
+
+ rc = libxl__xs_rm_checked(gc, t, lds->cmd_path);
+ if (rc) goto out;
+
+ rc = libxl__xs_rm_checked(gc, t, lds->ret_path);
+ if (rc) goto out;
+
+ rc = libxl__xs_transaction_commit(gc, &t);
+ if (!rc) break;
+ if (rc<0) goto out;
+ }
+
+ out:
+ /* rc < 0: error
+ * rc == 0: ok, we are done
+ * rc == +1: need to keep waiting
+ */
+ libxl__xs_transaction_abort(gc, &t);
+
+ if (!rc) {
+ switch_logdirty_done(egc,dss,1);
+ } else if (rc < 0) {
+ LOG(ERROR,"logdirty switch: response read failed (rc=%d)",rc);
+ switch_logdirty_done(egc,dss,0);
+ }
+}
+
+static void switch_logdirty_done(libxl__egc *egc,
+ libxl__domain_suspend_state *dss, int ok)
+{
+ STATE_AO_GC(dss->ao);
+ libxl__logdirty_switch *lds = &dss->logdirty;
+
+ libxl__ev_xswatch_deregister(gc, &lds->watch);
+ libxl__ev_time_deregister(gc, &lds->timeout);
+
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
+
+/*----- callbacks, called by xc_domain_save -----*/
+
int libxl__domain_suspend_device_model(libxl__gc *gc, uint32_t domid)
{
libxl_ctx *ctx = libxl__gc_owner(gc);
@@ -872,6 +1022,8 @@ void libxl__domain_suspend(libxl__egc *egc, libxl__domain_suspend_state *dss)
libxl__srm_save_autogen_callbacks *const callbacks =
&dss->shs.callbacks.save.a;
+ logdirty_init(&dss->logdirty);
+
switch (type) {
case LIBXL_DOMAIN_TYPE_HVM: {
char *path;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index ee7f66a..8f4f45d 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1864,6 +1864,14 @@ typedef struct libxl__domain_suspend_state libxl__domain_suspend_state;
typedef void libxl__domain_suspend_cb(libxl__egc*,
libxl__domain_suspend_state*, int rc);
+typedef struct libxl__logdirty_switch {
+ const char *cmd;
+ const char *cmd_path;
+ const char *ret_path;
+ libxl__ev_xswatch watch;
+ libxl__ev_time timeout;
+} libxl__logdirty_switch;
+
struct libxl__domain_suspend_state {
/* set by caller of libxl__domain_suspend */
libxl__ao *ao;
@@ -1882,6 +1890,7 @@ struct libxl__domain_suspend_state {
int guest_responded;
int interval; /* checkpoint interval (for Remus) */
libxl__save_helper_state shs;
+ libxl__logdirty_switch logdirty;
};
@@ -2013,8 +2022,15 @@ _hidden void libxl__xc_domain_save(libxl__egc*, libxl__domain_suspend_state*,
_hidden void libxl__xc_domain_save_done(libxl__egc*, void *dss_void,
int rc, int retval, int errnoval);
+/* Used by asynchronous callbacks: ie ones which xc regards as
+ * returning a value, but which we want to handle asynchronously.
+ * Such functions' actual callback function return void in libxl
+ * When they are ready to indicate completion, they call this. */
+void libxl__xc_domain_saverestore_async_callback_done(libxl__egc *egc,
+ libxl__save_helper_state *shs, int return_value);
+
_hidden int libxl__domain_suspend_common_callback(void *data);
-_hidden int libxl__domain_suspend_common_switch_qemu_logdirty
+_hidden void libxl__domain_suspend_common_switch_qemu_logdirty
(int domid, unsigned int enable, void *data);
_hidden int libxl__toolstack_save(uint32_t domid, uint8_t **buf,
uint32_t *len, void *data);
diff --git a/tools/libxl/libxl_save_callout.c b/tools/libxl/libxl_save_callout.c
index ff7dfb6..4413e97 100644
--- a/tools/libxl/libxl_save_callout.c
+++ b/tools/libxl/libxl_save_callout.c
@@ -129,6 +129,14 @@ void libxl__xc_domain_save(libxl__egc *egc, libxl__domain_suspend_state *dss,
}
+void libxl__xc_domain_saverestore_async_callback_done(libxl__egc *egc,
+ libxl__save_helper_state *shs, int return_value)
+{
+ shs->egc = egc;
+ libxl__srm_callout_sendreply(return_value, shs);
+ shs->egc = 0;
+}
+
/*----- helper execution -----*/
static void run_helper(libxl__egc *egc, libxl__save_helper_state *shs,
diff --git a/tools/libxl/libxl_save_msgs_gen.pl b/tools/libxl/libxl_save_msgs_gen.pl
index cd0837e..8832c46 100755
--- a/tools/libxl/libxl_save_msgs_gen.pl
+++ b/tools/libxl/libxl_save_msgs_gen.pl
@@ -14,6 +14,7 @@ our @msgs = (
# x - function pointer is in struct {save,restore}_callbacks
# and its null-ness needs to be passed through to the helper's xc
# W - needs a return value; callback is synchronous
+ # A - needs a return value; callback is asynchronous
[ 1, 'sr', "log", [qw(uint32_t level
uint32_t errnoval
STRING context
@@ -25,7 +26,7 @@ our @msgs = (
[ 3, 'scxW', "suspend", [] ],
[ 4, 'scxW', "postcopy", [] ],
[ 5, 'scxW', "checkpoint", [] ],
- [ 6, 'scxW', "switch_qemu_logdirty", [qw(int domid
+ [ 6, 'scxA', "switch_qemu_logdirty", [qw(int domid
unsigned enable)] ],
# toolstack_save done entirely `by hand'
[ 7, 'rcxW', "toolstack_restore", [qw(uint32_t domid
@@ -260,7 +261,7 @@ foreach my $msginfo (@msgs) {
$f_more_sr->(" int r;\n");
}
- my $c_rtype_helper = $flags =~ m/W/ ? 'int' : 'void';
+ my $c_rtype_helper = $flags =~ m/[WA]/ ? 'int' : 'void';
my $c_rtype_callout = $flags =~ m/W/ ? 'int' : 'void';
my $c_decl = '(';
my $c_callback_args = '';
@@ -348,7 +349,7 @@ END
assert(len == allocd);
${transmit}(buf, len, user);
");
- if ($flags =~ m/W/) {
+ if ($flags =~ m/[WA]/) {
f_more("${encode}_$name", (<<END.($debug ? <<END : '').<<END));
int r = ${helper}_getreply(user);
END
--
1.7.2.5
next prev parent reply other threads:[~2012-05-30 16:16 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-05-30 16:16 [PATCH v2 00/15] libxl: domain save/restore: run in a separate process Ian Jackson
2012-05-30 16:16 ` [PATCH 01/15] libxc: xc_domain_restore, make toolstack_restore const-correct Ian Jackson
2012-05-30 17:26 ` Ian Campbell
2012-05-31 9:55 ` Ian Jackson
2012-05-31 11:16 ` Ian Campbell
2012-05-31 14:00 ` Ian Jackson
2012-05-30 16:16 ` [PATCH 02/15] libxl: domain save: rename variables etc Ian Jackson
2012-05-30 17:32 ` Ian Campbell
2012-05-31 10:02 ` Ian Jackson
2012-05-30 16:16 ` [PATCH 03/15] libxl: domain restore: reshuffle, preparing for ao Ian Jackson
2012-05-30 16:16 ` [PATCH 04/15] libxl: domain save: API changes for asynchrony Ian Jackson
2012-05-30 16:16 ` [PATCH 05/15] libxl: domain save/restore: run in a separate process Ian Jackson
2012-05-30 16:16 ` [PATCH 06/15] libxl: rename libxl_dom:save_helper to physmap_path Ian Jackson
2012-05-30 17:44 ` Ian Campbell
2012-05-30 16:16 ` [PATCH 07/15] libxl: provide libxl__xs_*_checked and libxl__xs_transaction_* Ian Jackson
2012-05-30 17:48 ` Ian Campbell
2012-05-31 10:22 ` Ian Jackson
2012-05-31 11:17 ` Ian Campbell
2012-05-30 16:16 ` Ian Jackson [this message]
2012-05-30 16:16 ` [PATCH 09/15] libxl: datacopier: provide "prefix data" facilit Ian Jackson
2012-05-31 7:52 ` Ian Campbell
2012-05-31 10:32 ` Ian Jackson
2012-05-31 11:20 ` Ian Campbell
2012-05-31 14:07 ` Ian Jackson
2012-05-30 16:16 ` [PATCH 10/15] libxl: prepare for asynchronous writing of qemu save file Ian Jackson
2012-05-30 16:16 ` [PATCH 11/15] libxl: Make libxl__domain_save_device_model asynchronous Ian Jackson
2012-05-30 16:16 ` [PATCH 12/15] xl: Handle return value from libxl_domain_suspend correctly Ian Jackson
2012-05-31 7:53 ` Ian Campbell
2012-05-30 16:16 ` [PATCH 13/15] libxl: do not leak dms->saved_state Ian Jackson
2012-05-31 7:54 ` Ian Campbell
2012-05-30 16:16 ` [PATCH 14/15] libxl: do not leak spawned middle children Ian Jackson
2012-05-30 16:16 ` [PATCH 15/15] libxl: do not leak an event struct on ignored ao progress Ian Jackson
2012-05-31 7:55 ` Ian Campbell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1338394606-22693-9-git-send-email-ian.jackson@eu.citrix.com \
--to=ian.jackson@eu.citrix.com \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).