xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Shriram Rajagopalan <rshriram@cs.ubc.ca>
To: xen-devel@lists.xen.org
Cc: Andrew Cooper <andrew.cooper3@citrix.com>,
	Ian Jackson <ian.jackson@eu.citrix.com>,
	Ian Campbell <ian.campbell@citrix.com>,
	Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Subject: [PATCH 5 of 7 V4] tools/libxl: Control network buffering in remus	callbacks
Date: Thu, 14 Nov 2013 21:47:50 -0800	[thread overview]
Message-ID: <8e5366a6cd958c00a9b7.1384494470@athos.nss.cs.ubc.ca> (raw)
In-Reply-To: <patchbomb.1384494465@athos.nss.cs.ubc.ca>

# HG changeset patch
# User Shriram Rajagopalan <rshriram@cs.ubc.ca>
# Date 1384492677 28800
# Node ID 8e5366a6cd958c00a9b7a726c149a552a02a7af6
# Parent  d3d7da1c2289749b2a1c5b8baaf9d9c18a914e9b
tools/libxl: Control network buffering in remus callbacks

This patch constitutes the core network buffering logic.
and does the following:
 a) create a new network buffer when the domain is suspended
    (remus_domain_suspend_callback)
 b) release the previous network buffer pertaining to the
    committed checkpoint (remus_domain_checkpoint_dm_saved)

Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>

diff -r d3d7da1c2289 -r 8e5366a6cd95 tools/libxl/libxl_dom.c
--- a/tools/libxl/libxl_dom.c	Thu Nov 14 21:17:56 2013 -0800
+++ b/tools/libxl/libxl_dom.c	Thu Nov 14 21:17:57 2013 -0800
@@ -1285,9 +1285,23 @@ static void libxl__remus_domain_suspend_
     libxl__save_helper_state *shs = data;
     libxl__egc *egc = shs->egc;
     libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs);
+    libxl__remus_ctx *remus_ctx = dss->remus_ctx;
+    STATE_AO_GC(dss->ao);
 
-    /* REMUS TODO: Issue disk and network checkpoint reqs. */
+    /* REMUS TODO: Issue disk checkpoint reqs. */
     int ok = libxl__domain_suspend_callback_common(dss);
+
+    if (!remus_ctx->netbuf_ctx || !ok) goto out;
+
+    /* The domain was suspended successfully. Start a new network
+     * buffer for the next epoch. If this operation fails, then act
+     * as though domain suspend failed -- libxc exits its infinite
+     * loop and ultimately, the replication stops.
+     */
+    if (libxl__remus_netbuf_start_new_epoch(gc, dss->domid,
+                                            remus_ctx))
+        ok = 0;
+ out:
     libxl__xc_domain_saverestore_async_callback_done(egc, shs, ok);
 }
 
@@ -1301,7 +1315,7 @@ static int libxl__remus_domain_resume_ca
     if (libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1))
         return 0;
 
-    /* REMUS TODO: Deal with disk. Start a new network output buffer */
+    /* REMUS TODO: Deal with disk. */
     return 1;
 }
 
@@ -1310,6 +1324,9 @@ static int libxl__remus_domain_resume_ca
 static void remus_checkpoint_dm_saved(libxl__egc *egc,
                                       libxl__domain_suspend_state *dss, int rc);
 
+static void remus_next_checkpoint(libxl__egc *egc, libxl__ev_time *ev,
+                                  const struct timeval *requested_abs);
+
 static void libxl__remus_domain_checkpoint_callback(void *data)
 {
     libxl__save_helper_state *shs = data;
@@ -1328,10 +1345,51 @@ static void libxl__remus_domain_checkpoi
 static void remus_checkpoint_dm_saved(libxl__egc *egc,
                                       libxl__domain_suspend_state *dss, int rc)
 {
-    /* REMUS TODO: Wait for disk and memory ack, release network buffer */
-    /* REMUS TODO: make this asynchronous */
-    assert(!rc); /* REMUS TODO handle this error properly */
-    usleep(dss->remus_ctx->interval * 1000);
+    /*
+     * REMUS TODO: Wait for disk and explicit memory ack (through restore
+     * callback from remote) before releasing network buffer.
+     */
+    libxl__remus_ctx *remus_ctx = dss->remus_ctx;
+    STATE_AO_GC(dss->ao);
+
+    if (rc) {
+        LOG(ERROR, "Failed to save device model. Terminating Remus..");
+        goto out;
+    }
+
+    if (remus_ctx->netbuf_ctx) {
+        rc = libxl__remus_netbuf_release_prev_epoch(gc, dss->domid,
+                                                     remus_ctx);
+        if (rc) {
+            LOG(ERROR, "Failed to release network buffer."
+                " Terminating Remus..");
+            goto out;
+        }
+    }
+
+    /* Set checkpoint interval timeout */
+    rc = libxl__ev_time_register_rel(gc, &remus_ctx->timeout,
+                                     remus_next_checkpoint,
+                                     dss->remus_ctx->interval);
+    if (rc) {
+        LOG(ERROR, "unable to register timeout for next epoch."
+            " Terminating Remus..");
+        goto out;
+    }
+    return;
+
+ out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0);
+}
+
+static void remus_next_checkpoint(libxl__egc *egc, libxl__ev_time *ev,
+                                  const struct timeval *requested_abs)
+{
+    libxl__remus_ctx *remus_ctx = CONTAINER_OF(ev, *remus_ctx, timeout);
+    libxl__domain_suspend_state *dss = remus_ctx->dss;
+    STATE_AO_GC(dss->ao);
+
+    libxl__ev_time_deregister(gc, &remus_ctx->timeout);
     libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 1);
 }

  parent reply	other threads:[~2013-11-15  5:47 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-11-15  5:47 [PATCH 0 of 7 V4] Remus/Libxl: Network buffering support Shriram Rajagopalan
2013-11-15  5:47 ` [PATCH 1 of 7 V4] [PATCH] libxl: make libxl__domain_suspend_callback be asynchronous Shriram Rajagopalan
2013-11-15  5:47 ` [PATCH 2 of 7 V4] remus: add libnl3 dependency to autoconf scripts Shriram Rajagopalan
2013-11-18 16:31   ` Ian Jackson
2013-11-15  5:47 ` [PATCH 3 of 7 V4] tools/hotplug: Remus network buffering setup scripts Shriram Rajagopalan
2013-11-18 16:34   ` Ian Jackson
2013-11-15  5:47 ` [PATCH 4 of 7 V4] tools/libxl: setup/teardown Remus network buffering Shriram Rajagopalan
2013-11-15  5:47 ` Shriram Rajagopalan [this message]
2013-11-15  5:47 ` [PATCH 6 of 7 V4] tools/xl: Remus - Network buffering cmdline switch Shriram Rajagopalan
2013-11-15  5:47 ` [PATCH 7 of 7 V4] tools/libxl: refactor domain_suspend_callback code to be fully asynchronous Shriram Rajagopalan
2013-11-15 18:27   ` Shriram Rajagopalan
2013-11-18 17:36   ` Shriram Rajagopalan
2013-11-18 17:45     ` Ian Jackson
2013-11-18 17:55       ` Shriram Rajagopalan
2013-11-18 17:49   ` Ian Jackson
2013-11-18 17:52     ` Shriram Rajagopalan
2013-11-18 13:13 ` [PATCH 0 of 7 V4] Remus/Libxl: Network buffering support Shriram Rajagopalan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8e5366a6cd958c00a9b7.1384494470@athos.nss.cs.ubc.ca \
    --to=rshriram@cs.ubc.ca \
    --cc=andrew.cooper3@citrix.com \
    --cc=ian.campbell@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).