xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Ian Jackson <ian.jackson@eu.citrix.com>
To: xen-devel@lists.xen.org
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Subject: [PATCH 27/27] libxl: abort bootloader invocation when domain dies
Date: Fri, 11 May 2012 18:58:12 +0100	[thread overview]
Message-ID: <1336759092-2432-28-git-send-email-ian.jackson@eu.citrix.com> (raw)
In-Reply-To: <1336759092-2432-1-git-send-email-ian.jackson@eu.citrix.com>

Cancel the bootloader (specifically, by sending it a signal) if the
domain is seen to disappear from xenstore.

We use a new utility event source libxl__domaindeathcheck which
provides a convenient wrapper for the xenstore watch.

Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>

Changes since v8:
 * Fixed the commit message summary line.

Changes since v7:
 * Add a comment explaining why we use a watch on the domain's
   xenstore path rather than @releaseDomain.
 * Fix typo in error message.
---
 tools/libxl/libxl_bootloader.c |   43 ++++++++++++++++++++++++++++--------
 tools/libxl/libxl_event.c      |   47 ++++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_internal.h   |   29 +++++++++++++++++++++++-
 3 files changed, 108 insertions(+), 11 deletions(-)

diff --git a/tools/libxl/libxl_bootloader.c b/tools/libxl/libxl_bootloader.c
index 8436c07..fb1302b 100644
--- a/tools/libxl/libxl_bootloader.c
+++ b/tools/libxl/libxl_bootloader.c
@@ -31,6 +31,7 @@ static void bootloader_keystrokes_copyfail(libxl__egc *egc,
        libxl__datacopier_state *dc, int onwrite, int errnoval);
 static void bootloader_display_copyfail(libxl__egc *egc,
        libxl__datacopier_state *dc, int onwrite, int errnoval);
+static void bootloader_domaindeath(libxl__egc*, libxl__domaindeathcheck *dc);
 static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child,
                                 pid_t pid, int status);
 
@@ -213,6 +214,7 @@ void libxl__bootloader_init(libxl__bootloader_state *bl)
     bl->ptys[0].master = bl->ptys[0].slave = 0;
     bl->ptys[1].master = bl->ptys[1].slave = 0;
     libxl__ev_child_init(&bl->child);
+    libxl__domaindeathcheck_init(&bl->deathcheck);
     bl->keystrokes.ao = bl->ao;  libxl__datacopier_init(&bl->keystrokes);
     bl->display.ao = bl->ao;     libxl__datacopier_init(&bl->display);
 }
@@ -230,6 +232,7 @@ static void bootloader_cleanup(libxl__egc *egc, libxl__bootloader_state *bl)
         free(bl->diskpath);
         bl->diskpath = 0;
     }
+    libxl__domaindeathcheck_stop(gc,&bl->deathcheck);
     libxl__datacopier_kill(&bl->keystrokes);
     libxl__datacopier_kill(&bl->display);
     for (i=0; i<2; i++) {
@@ -256,6 +259,23 @@ static void bootloader_callback(libxl__egc *egc, libxl__bootloader_state *bl,
     bl->callback(egc, bl, rc);
 }
 
+/* might be called at any time, provided it's init'd */
+static void bootloader_abort(libxl__egc *egc,
+                             libxl__bootloader_state *bl, int rc)
+{
+    STATE_AO_GC(bl->ao);
+    int r;
+
+    libxl__datacopier_kill(&bl->keystrokes);
+    libxl__datacopier_kill(&bl->display);
+    if (libxl__ev_child_inuse(&bl->child)) {
+        r = kill(bl->child.pid, SIGTERM);
+        if (r) LOGE(WARN, "after failure, failed to kill bootloader [%lu]",
+                    (unsigned long)bl->child.pid);
+    }
+    bl->rc = rc;
+}
+
 /*----- main flow of control -----*/
 
 void libxl__bootloader_run(libxl__egc *egc, libxl__bootloader_state *bl)
@@ -377,6 +397,12 @@ static void bootloader_gotptys(libxl__egc *egc, libxl__openpty_state *op)
         goto out;
     }
 
+    bl->deathcheck.what = "stopping bootloader";
+    bl->deathcheck.domid = bl->domid;
+    bl->deathcheck.callback = bootloader_domaindeath;
+    rc = libxl__domaindeathcheck_start(gc, &bl->deathcheck);
+    if (rc) goto out;
+
     if (bl->console_available)
         bl->console_available(egc, bl);
 
@@ -454,18 +480,9 @@ static void bootloader_copyfail(libxl__egc *egc, const char *which,
        libxl__bootloader_state *bl, int onwrite, int errnoval)
 {
     STATE_AO_GC(bl->ao);
-    int r;
-
     if (!onwrite && !errnoval)
         LOG(ERROR, "unexpected eof copying %s", which);
-    libxl__datacopier_kill(&bl->keystrokes);
-    libxl__datacopier_kill(&bl->display);
-    if (libxl__ev_child_inuse(&bl->child)) {
-        r = kill(bl->child.pid, SIGTERM);
-        if (r) LOGE(WARN, "after failure, failed to kill bootloader [%lu]",
-                    (unsigned long)bl->child.pid);
-    }
-    bl->rc = ERROR_FAIL;
+    bootloader_abort(egc, bl, ERROR_FAIL);
 }
 static void bootloader_keystrokes_copyfail(libxl__egc *egc,
        libxl__datacopier_state *dc, int onwrite, int errnoval)
@@ -480,6 +497,12 @@ static void bootloader_display_copyfail(libxl__egc *egc,
     bootloader_copyfail(egc, "bootloader output", bl, onwrite, errnoval);
 }
 
+static void bootloader_domaindeath(libxl__egc *egc, libxl__domaindeathcheck *dc)
+{
+    libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, deathcheck);
+    bootloader_abort(egc, bl, ERROR_FAIL);
+}
+
 static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child,
                                 pid_t pid, int status)
 {
diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c
index 63719b2..03d0498 100644
--- a/tools/libxl/libxl_event.c
+++ b/tools/libxl/libxl_event.c
@@ -589,6 +589,53 @@ int libxl__ev_devstate_wait(libxl__gc *gc, libxl__ev_devstate *ds,
 }
 
 /*
+ * domain death/destruction
+ */
+
+/*
+ * We use a xenstore watch on the domain's path, rather than using an
+ * @releaseDomain watch and asking the hypervisor.  This is simpler
+ * because turning @releaseDomain into domain-specific information is
+ * complicated.
+ *
+ * It is also sufficient for our callers, which are generally trying
+ * to do cleanup of their own execution state on domain death, for the
+ * following reason: if the domain is destroyed then either (a) the
+ * entries in xenstore have already been deleted, in which case the
+ * test here works or (b) they have not in which case something has
+ * gone very badly wrong and we are going to leak those xenstore
+ * entries, in which case trying to avoid leaking other stuff is
+ * futile.
+ */
+
+static void domaindeathcheck_callback(libxl__egc *egc, libxl__ev_xswatch *w,
+                            const char *watch_path, const char *event_path)
+{
+    libxl__domaindeathcheck *dc = CONTAINER_OF(w, *dc, watch);
+    EGC_GC;
+    const char *p = libxl__xs_read(gc, XBT_NULL, watch_path);
+    if (p) return;
+
+    if (errno!=ENOENT) {
+        LIBXL__EVENT_DISASTER(egc,"failed to read xenstore"
+                              " for domain detach check", errno, 0);
+        return;
+    }
+
+    LOG(ERROR,"%s: domain %"PRIu32" removed (%s no longer in xenstore)",
+        dc->what, dc->domid, watch_path);
+    dc->callback(egc, dc);
+}
+
+int libxl__domaindeathcheck_start(libxl__gc *gc,
+                                  libxl__domaindeathcheck *dc)
+{
+    const char *path = GCSPRINTF("/local/domain/%"PRIu32, dc->domid);
+    return libxl__ev_xswatch_register(gc, &dc->watch,
+                                      domaindeathcheck_callback, path);
+}
+
+/*
  * osevent poll
  */
 
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 83fabea..f71e76a 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -866,6 +866,33 @@ _hidden int libxl__ev_devstate_wait(libxl__gc *gc, libxl__ev_devstate *ds,
                                     const char *state_path,
                                     int state, int milliseconds);
 
+/*
+ * libxl__ev_domaindeathcheck_register - arranges to call back (once)
+ * if the domain is destroyed.  If the domain dies, we log a message
+ * of the form "<what>: <explanation of the situation, including the domid>".
+ */
+
+typedef struct libxl__domaindeathcheck libxl__domaindeathcheck;
+typedef void libxl___domaindeathcheck_callback(libxl__egc *egc,
+                                         libxl__domaindeathcheck*);
+
+struct libxl__domaindeathcheck {
+    /* must be filled in by caller, and remain valid: */
+    const char *what;
+    uint32_t domid;
+    libxl___domaindeathcheck_callback *callback;
+    /* private */
+    libxl__ev_xswatch watch;
+};
+
+_hidden int libxl__domaindeathcheck_start(libxl__gc *gc,
+                                          libxl__domaindeathcheck *dc);
+
+static inline void libxl__domaindeathcheck_init
+ (libxl__domaindeathcheck *dc) { libxl__ev_xswatch_init(&dc->watch); }
+static inline void libxl__domaindeathcheck_stop(libxl__gc *gc,
+  libxl__domaindeathcheck *dc) { libxl__ev_xswatch_deregister(gc,&dc->watch); }
+
 
 /*
  * libxl__try_phy_backend - Check if there's support for the passed
@@ -1738,6 +1765,7 @@ struct libxl__bootloader_state {
     libxl__openpty_state openpty;
     libxl__openpty_result ptys[2];  /* [0] is for bootloader */
     libxl__ev_child child;
+    libxl__domaindeathcheck deathcheck;
     int nargs, argsspace;
     const char **args;
     libxl__datacopier_state keystrokes, display;
@@ -1750,7 +1778,6 @@ _hidden void libxl__bootloader_init(libxl__bootloader_state *bl);
  * If callback is passed rc==0, will have updated st->info appropriately */
 _hidden void libxl__bootloader_run(libxl__egc*, libxl__bootloader_state *st);
 
-
 /*----- Domain creation -----*/
 
 typedef struct libxl__domain_create_state libxl__domain_create_state;
-- 
1.7.2.5

      parent reply	other threads:[~2012-05-11 17:58 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-05-11 17:57 [PATCH v9 00/27] libxl: child process handling Ian Jackson
2012-05-11 17:57 ` [PATCH 01/27] libxl: handle POLLERR, POLLHUP, POLLNVAL properly Ian Jackson
2012-05-11 17:57 ` [PATCH 02/27] libxl: support multiple libxl__ev_fds for the same fd Ian Jackson
2012-05-11 17:57 ` [PATCH 03/27] libxl: event API: new facilities for waiting for subprocesses Ian Jackson
2012-05-11 17:57 ` [PATCH 04/27] autoconf: trim the configure script; use autoheader Ian Jackson
2012-05-11 17:57 ` [PATCH 05/27] autoconf: New test for openpty et al Ian Jackson
2012-05-11 17:57 ` [PATCH 06/27] libxl: provide libxl__remove_file " Ian Jackson
2012-05-11 17:57 ` [PATCH 07/27] libxl: Introduce libxl__sendmsg_fds and libxl__recvmsg_fds Ian Jackson
2012-05-11 17:57 ` [PATCH 08/27] libxl: Clean up setdefault in do_domain_create Ian Jackson
2012-05-11 17:57 ` [PATCH 09/27] libxl: provide libxl__datacopier_* Ian Jackson
2012-05-11 17:57 ` [PATCH 10/27] libxl: provide libxl__openpty_* Ian Jackson
2012-05-11 17:57 ` [PATCH 11/27] libxl: ao: Convert libxl_run_bootloader Ian Jackson
2012-05-11 17:57 ` [PATCH 12/27] libxl: make libxl_create_logfile const-correct Ian Jackson
2012-05-11 17:57 ` [PATCH 13/27] libxl: log bootloader output Ian Jackson
2012-05-11 17:57 ` [PATCH 14/27] libxl: Allow AO_GC and EGC_GC even if not used Ian Jackson
2012-05-11 17:58 ` [PATCH 15/27] libxl: add a dummy ao_how to libxl_domain_core_dump Ian Jackson
2012-05-11 17:58 ` [PATCH 16/27] libxl: remove ctx->waitpid_instead Ian Jackson
2012-05-11 17:58 ` [PATCH 17/27] libxl: change some structures to unit arrays Ian Jackson
2012-05-11 17:58 ` [PATCH 18/27] libxl: ao: convert libxl__spawn_* Ian Jackson
2012-05-11 17:58 ` [PATCH 19/27] libxl: set guest_domid even if libxl__domain_make fails Ian Jackson
2012-05-11 17:58 ` [PATCH 20/27] libxl: make libxl_create run bootloader via callback Ian Jackson
2012-05-11 17:58 ` [PATCH 21/27] libxl: Fix an ao completion bug; document locking policy Ian Jackson
2012-05-11 17:58 ` [PATCH 22/27] libxl: provide progress reporting for long-running operations Ian Jackson
2012-05-11 17:58 ` [PATCH 23/27] libxl: remove malloc failure handling from NEW_EVENT Ian Jackson
2012-05-11 17:58 ` [PATCH 24/27] libxl: convert console callback to libxl_asyncprogress_how Ian Jackson
2012-05-11 17:58 ` [PATCH 25/27] libxl: clarify definition of "slow" operation Ian Jackson
2012-05-11 17:58 ` [PATCH 26/27] libxl: child processes cleanups Ian Jackson
2012-05-11 17:58 ` Ian Jackson [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1336759092-2432-28-git-send-email-ian.jackson@eu.citrix.com \
    --to=ian.jackson@eu.citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).