From mboxrd@z Thu Jan 1 00:00:00 1970 From: Shriram Rajagopalan Subject: [PATCH 4 of 5 V3] tools/libxl: Control network buffering in remus callbacks Date: Sun, 20 Oct 2013 22:58:06 -0700 Message-ID: References: Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen-devel@lists.xen.org Cc: Andrew Cooper , Ian Jackson , Ian Campbell , Stefano Stabellini List-Id: xen-devel@lists.xenproject.org # HG changeset patch # User Shriram Rajagopalan # Date 1382295546 25200 # Node ID a8deb9499e9dcce9869025fa1c02cf2e0d58612a # Parent d3f088236c550213fc04ed982df47b4771b28d2f tools/libxl: Control network buffering in remus callbacks This patch constitutes the core network buffering logic. and does the following: a) create a new network buffer when the domain is suspended (remus_domain_suspend_callback) b) release the previous network buffer pertaining to the committed checkpoint (remus_domain_checkpoint_dm_saved) Signed-off-by: Shriram Rajagopalan diff -r d3f088236c55 -r a8deb9499e9d tools/libxl/libxl_dom.c --- a/tools/libxl/libxl_dom.c Sun Oct 20 11:54:26 2013 -0700 +++ b/tools/libxl/libxl_dom.c Sun Oct 20 11:59:06 2013 -0700 @@ -1259,8 +1259,24 @@ void libxl__remus_teardown_done(libxl__e static int libxl__remus_domain_suspend_callback(void *data) { - /* REMUS TODO: Issue disk and network checkpoint reqs. */ - return libxl__domain_suspend_common_callback(data); + /* REMUS TODO: Issue disk checkpoint reqs. */ + libxl__save_helper_state *shs = data; + libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs); + libxl__remus_ctx *remus_ctx = dss->remus_ctx; + bool is_suspended; + STATE_AO_GC(dss->ao); + + is_suspended = !!libxl__domain_suspend_common_callback(data); + + if (!remus_ctx->netbuf_ctx) return is_suspended; + + if (is_suspended) { + if (libxl__remus_netbuf_start_new_epoch(gc, dss->domid, + remus_ctx)) + return !is_suspended; + } + + return is_suspended; } static int libxl__remus_domain_resume_callback(void *data) @@ -1273,7 +1289,7 @@ static int libxl__remus_domain_resume_ca if (libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1)) return 0; - /* REMUS TODO: Deal with disk. Start a new network output buffer */ + /* REMUS TODO: Deal with disk. */ return 1; } @@ -1300,11 +1316,42 @@ static void libxl__remus_domain_checkpoi static void remus_checkpoint_dm_saved(libxl__egc *egc, libxl__domain_suspend_state *dss, int rc) { - /* REMUS TODO: Wait for disk and memory ack, release network buffer */ - /* REMUS TODO: make this asynchronous */ - assert(!rc); /* REMUS TODO handle this error properly */ - usleep(dss->remus_ctx->interval * 1000); - libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 1); + /* + * REMUS TODO: Wait for disk and explicit memory ack (through restore + * callback from remote) before releasing network buffer. + */ + libxl__remus_ctx *remus_ctx = dss->remus_ctx; + struct timespec epoch; + int do_next_iter = 0; + STATE_AO_GC(dss->ao); + + if (rc) { + LOG(ERROR, "Failed to save device model. Terminating Remus.."); + goto out; + } + + if (remus_ctx->netbuf_ctx) { + rc = libxl__remus_netbuf_release_prev_epoch(gc, dss->domid, + remus_ctx); + if (rc) { + LOG(ERROR, "Failed to release network buffer." + " Terminating Remus.."); + goto out; + } + } + + epoch.tv_sec = remus_ctx->interval / 1000; /* interval is in ms */ + epoch.tv_nsec = remus_ctx->interval * 1000L * 1000L; + nanosleep(&epoch, 0); + /* + * Set return value to 1, so that the infinite checkpoint cycle + * continues. See xc_domain_save.c: xc_domain_save() + */ + do_next_iter = 1; + + out: + libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, + do_next_iter); } /*----- main code for suspending, in order of execution -----*/