xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Wen Congyang <wency@cn.fujitsu.com>
To: xen devel <xen-devel@lists.xen.org>
Cc: Ian Campbell <Ian.Campbell@citrix.com>,
	Wen Congyang <wency@cn.fujitsu.com>,
	Ian Jackson <Ian.Jackson@eu.citrix.com>,
	Jiang Yunhong <yunhong.jiang@intel.com>,
	Dong Eddie <eddie.dong@intel.com>,
	Yang Hongyang <yanghy@cn.fujitsu.com>,
	Lai Jiangshan <laijs@cn.fujitsu.com>
Subject: [RFC Patch v2 44/45] libxl/colo: setup and control disk replication for blktap2 backends
Date: Fri, 8 Aug 2014 15:01:43 +0800	[thread overview]
Message-ID: <1407481305-19808-45-git-send-email-wency@cn.fujitsu.com> (raw)
In-Reply-To: <1407481305-19808-1-git-send-email-wency@cn.fujitsu.com>

This patch adds the machinery required for protecting a guest's
disk state, when the guest disk uses a blktap2 disk backend.
1. COLO blktap2 disk device: Implements the interfaces required by the
   checkpoint abstract device layer. A note about the implementation:
   a) setup() is called for each disk attached to the guest.
      During setup():
      i) perform the sanity check: backend type should be LIBXL_DISK_BACKEND_TAP
         and format should be LIBXL_DISK_FORMAT_COLO.
      ii) connect to the control socket: /var/run/tap/colo_xxx, xxx is
          "host:port"(The character ':/' will be changed to '_').
   b) The postsuspend callback() will write "flush" to this socket
   c) The commit callback() will wait and read "done" from this socket

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 tools/libxl/Makefile                       |   2 +-
 tools/libxl/libxl_colo_save.c              |   5 +-
 tools/libxl/libxl_colo_save_disk_blktap2.c | 216 +++++++++++++++++++++++++++++
 tools/libxl/libxl_create.c                 |   7 +
 tools/libxl/libxl_noblktap2.c              |  29 ++++
 5 files changed, 257 insertions(+), 2 deletions(-)
 create mode 100644 tools/libxl/libxl_colo_save_disk_blktap2.c

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 1c32ae2..b4755c8 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -45,7 +45,7 @@ LIBXLU_LIBS =
 
 LIBXL_OBJS-y = osdeps.o libxl_paths.o libxl_bootloader.o flexarray.o
 ifeq ($(LIBXL_BLKTAP),y)
-LIBXL_OBJS-y += libxl_blktap2.o
+LIBXL_OBJS-y += libxl_blktap2.o libxl_colo_save_disk_blktap2.o
 else
 LIBXL_OBJS-y += libxl_noblktap2.o
 endif
diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c
index 75d83c8..b7070b2 100644
--- a/tools/libxl/libxl_colo_save.c
+++ b/tools/libxl/libxl_colo_save.c
@@ -18,7 +18,10 @@
 #include "libxl_internal.h"
 #include "libxl_colo.h"
 
+extern const libxl__checkpoint_device_subkind_ops colo_save_device_blktap2_disk;
+
 static const libxl__checkpoint_device_subkind_ops *colo_ops[] = {
+    &colo_save_device_blktap2_disk,
     NULL,
 };
 
@@ -49,7 +52,7 @@ void libxl__colo_save_setup(libxl__egc *egc, libxl__colo_save_state *css)
     css->svm_running = false;
 
     /* TODO: disk/nic support */
-    cds->device_kind_flags = 0;
+    cds->device_kind_flags = LIBXL__CHECKPOINT_DEVICE_DISK;
     cds->ops = colo_ops;
     cds->callback = colo_save_setup_done;
     cds->ao = ao;
diff --git a/tools/libxl/libxl_colo_save_disk_blktap2.c b/tools/libxl/libxl_colo_save_disk_blktap2.c
new file mode 100644
index 0000000..1c35971
--- /dev/null
+++ b/tools/libxl/libxl_colo_save_disk_blktap2.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author: Wen Congyang <wency@cn.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+#include <string.h>
+#include <sys/un.h>
+
+#define     BLKTAP2_REQUEST     "flush"
+#define     BLKTAP2_RESPONSE    "done"
+#define     BLKTAP_CTRL_DIR     "/var/run/tap"
+
+typedef struct libxl__colo_blktap2_disk {
+    char *name;
+    char *ctl_socket_path;
+    int fd;
+    libxl__ev_fd ev;
+    libxl__checkpoint_device *dev;
+}libxl__colo_blktap2_disk;
+
+/* ========== init() and cleanup() ========== */
+static int blktap2_colo_init(libxl__checkpoint_devices_state *cds)
+{
+    return 0;
+}
+
+static void blktap2_colo_cleanup(libxl__checkpoint_devices_state *cds)
+{
+}
+
+/* ========== setup() and teardown() ========== */
+static int blktap2_control_connect(libxl__gc *gc,
+                                   libxl__colo_blktap2_disk *blktap2_disk)
+{
+    struct sockaddr_un saddr;
+    int fd, err;
+
+    fd = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (fd < 0) {
+        LOG(ERROR, "cannot creating socket fd");
+        return ERROR_FAIL;
+    }
+
+    memset(&saddr, 0, sizeof(saddr));
+    saddr.sun_family = AF_UNIX;
+    strcpy(saddr.sun_path, blktap2_disk->ctl_socket_path);
+
+    err = connect(fd, (const struct sockaddr *)&saddr, sizeof(saddr));
+    if (err) {
+        LOG(ERROR, "cannot connecte to %s", blktap2_disk->ctl_socket_path);
+        close(fd);
+        return ERROR_FAIL;
+    }
+
+    blktap2_disk->fd = fd;
+    return 0;
+}
+
+static void blktap2_colo_setup(libxl__checkpoint_device *dev)
+{
+    const libxl_device_disk *disk = dev->backend_dev;
+    libxl__colo_blktap2_disk *blktap2_disk;
+    int rc;
+    char *type;
+    int i, l;
+
+    STATE_AO_GC(dev->cds->ao);
+
+    if (disk->backend != LIBXL_DISK_BACKEND_TAP ||
+        disk->format != LIBXL_DISK_FORMAT_COLO) {
+        rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH;
+        goto out;
+    }
+
+    dev->set_up = 1;
+    GCNEW(blktap2_disk);
+    dev->concrete_data = blktap2_disk;
+    blktap2_disk->fd = -1;
+    blktap2_disk->dev = dev;
+
+    type = strchr(disk->pdev_path, '|');
+    if (!type) {
+        LOG(ERROR, "unexpected pdev_path: %s", disk->pdev_path);
+        rc = ERROR_FAIL;
+        goto out;
+    }
+    blktap2_disk->name = libxl__strndup(gc, disk->pdev_path,
+                                        type - disk->pdev_path);
+    blktap2_disk->ctl_socket_path = libxl__sprintf(gc, "%s/colo_%s",
+                                                   BLKTAP_CTRL_DIR,
+                                                   blktap2_disk->name);
+    /* scrub socket pathname */
+    l = strlen(blktap2_disk->ctl_socket_path);
+    for (i = strlen(BLKTAP_CTRL_DIR) + 1; i < l; i++) {
+        if (strchr(":/", blktap2_disk->ctl_socket_path[i]))
+            blktap2_disk->ctl_socket_path[i] = '_';
+    }
+
+    libxl__ev_fd_init(&blktap2_disk->ev);
+
+    rc = blktap2_control_connect(gc, blktap2_disk);
+
+out:
+    dev->aodev.rc = rc;
+    dev->aodev.callback(dev->cds->egc, &dev->aodev);
+}
+
+static void blktap2_colo_teardown(libxl__checkpoint_device *dev)
+{
+    libxl__colo_blktap2_disk *blktap2_disk = dev->concrete_data;
+
+    if (blktap2_disk->fd > 0) {
+        close(blktap2_disk->fd);
+        blktap2_disk->fd = -1;
+    }
+
+    dev->aodev.rc = 0;
+    dev->aodev.callback(dev->cds->egc, &dev->aodev);
+}
+
+/* ========== checkpointing APIs ========== */
+static void blktap2_control_readable(libxl__egc *egc, libxl__ev_fd *ev,
+                                     int fd, short events, short revents);
+
+static void blktap2_colo_postsuspend(libxl__checkpoint_device *dev)
+{
+    int ret;
+    libxl__colo_blktap2_disk *blktap2_disk = dev->concrete_data;
+    int rc = 0;
+
+    /* unit socket fd, so not block */
+    ret = write(blktap2_disk->fd, BLKTAP2_REQUEST, strlen(BLKTAP2_REQUEST));
+    if (ret < strlen(BLKTAP2_REQUEST))
+        rc = ERROR_FAIL;
+
+    dev->aodev.rc = rc;
+    dev->aodev.callback(dev->cds->egc, &dev->aodev);
+}
+
+static void blktap2_colo_commit(libxl__checkpoint_device *dev)
+{
+    libxl__colo_blktap2_disk *blktap2_disk = dev->concrete_data;
+    int rc;
+
+    /* Convenience aliases */
+    const int fd = blktap2_disk->fd;
+    libxl__ev_fd *const ev = &blktap2_disk->ev;
+
+    STATE_AO_GC(dev->cds->ao);
+
+    rc = libxl__ev_fd_register(gc, ev, blktap2_control_readable, fd, POLLIN);
+    if (rc) {
+        dev->aodev.rc = rc;
+        dev->aodev.callback(dev->cds->egc, &dev->aodev);
+    }
+}
+
+static void blktap2_control_readable(libxl__egc *egc, libxl__ev_fd *ev,
+                                     int fd, short events, short revents)
+{
+    libxl__colo_blktap2_disk *blktap2_disk =
+                CONTAINER_OF(ev, *blktap2_disk, ev);
+    int rc = 0, ret;
+    char response[5];
+
+    /* Convenience aliases */
+    libxl__checkpoint_device *const dev = blktap2_disk->dev;
+
+    EGC_GC;
+
+    libxl__ev_fd_deregister(gc, ev);
+
+    if (revents & ~POLLIN) {
+        LOG(ERROR, "unexpected poll event 0x%x (should be POLLIN)", revents);
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    ret = read(blktap2_disk->fd, response, sizeof(response) - 1);
+    if (ret < sizeof(response) - 1) {
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    response[4] = '\0';
+    if (strcmp(response, BLKTAP2_RESPONSE))
+        rc = ERROR_FAIL;
+
+out:
+    dev->aodev.rc = rc;
+    dev->aodev.callback(dev->cds->egc, &dev->aodev);
+}
+
+const libxl__checkpoint_device_subkind_ops colo_save_device_blktap2_disk = {
+    .kind = LIBXL__CHECKPOINT_DEVICE_DISK,
+    .init = blktap2_colo_init,
+    .cleanup = blktap2_colo_cleanup,
+    .setup = blktap2_colo_setup,
+    .teardown = blktap2_colo_teardown,
+    .postsuspend = blktap2_colo_postsuspend,
+    .commit = blktap2_colo_commit,
+};
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 46bd02d..d1facef 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -831,6 +831,13 @@ static void initiate_domain_create(libxl__egc *egc,
     for (i = 0; i < d_config->num_disks; i++) {
         ret = libxl__device_disk_setdefault(gc, &d_config->disks[i]);
         if (ret) goto error_out;
+
+        /* TODO: cleanup it when destroying the domain */
+        if (d_config->disks[i].backend == LIBXL_DISK_BACKEND_TAP &&
+            (d_config->disks[i].format == LIBXL_DISK_FORMAT_REMUS ||
+             d_config->disks[i].format == LIBXL_DISK_FORMAT_COLO))
+            libxl__blktap_devpath(gc, d_config->disks[i].pdev_path,
+                                  d_config->disks[i].format);
     }
 
     dcs->bl.ao = ao;
diff --git a/tools/libxl/libxl_noblktap2.c b/tools/libxl/libxl_noblktap2.c
index 38696ec..46207b3 100644
--- a/tools/libxl/libxl_noblktap2.c
+++ b/tools/libxl/libxl_noblktap2.c
@@ -39,6 +39,35 @@ libxl_disk_format libxl__blktap_get_real_format(const char *disk,
     return format;
 }
 
+static int blktap2_colo_init(libxl__checkpoint_device *cds)
+{
+    return 0;
+}
+
+static void blktap2_colo_cleanup(libxl__checkpoint_device *cds)
+{
+    return;
+}
+
+static void blktap2_colo_setup(libxl__checkpoint_device *cds)
+{
+    dev->aodev.rc = ERROR_FAIL;
+    dev->aodev.callback(dev->cds->egc, &dev->aodev);
+}
+
+static void blktap2_colo_teardown(libxl__checkpoint_device *cds)
+{
+    return;
+}
+
+const libxl__checkpoint_device_subkind_ops colo_save_device_blktap2_disk = {
+    .kind = LIBXL__CHECKPOINT_DEVICE_DISK,
+    .init = blktap2_colo_init,
+    .cleanup = blktap2_colo_cleanup,
+    .setup = blktap2_colo_setup,
+    .teardown = blktap2_colo_teardown,
+};
+
 /*
  * Local variables:
  * mode: C
-- 
1.9.3

  parent reply	other threads:[~2014-08-08  7:01 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-08  7:00 [RFC Patch v2 00/45] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 01/45] copy the correct page to memory Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 02/45] csum the correct page Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 03/45] don't zero out ioreq page Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 04/45] Refactor domain_suspend_callback_common() Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 05/45] Update libxl__domain_resume() for colo Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 06/45] Update libxl__domain_suspend_common_switch_qemu_logdirty() " Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 07/45] Introduce a new internal API libxl__domain_unpause() Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 08/45] Update libxl__domain_unpause() to support qemu-xen Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 09/45] support to resume uncooperative HVM guests Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 10/45] update datecopier to support sending data only Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 11/45] introduce a new API to aync read data from fd Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 12/45] move remus related codes to libxl_remus.c Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 13/45] rename remus device to checkpoint device Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 14/45] adjust the indentation Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 15/45] don't touch remus in checkpoint_device Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 16/45] Update libxl_save_msgs_gen.pl to support return data from xl to xc Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 17/45] Allow slave sends data to master Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 18/45] secondary vm suspend/resume/checkpoint code Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 19/45] primary vm suspend/get_dirty_pfn/resume/checkpoint code Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 20/45] xc_domain_save: flush cache before calling callbacks->postcopy() in colo mode Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 21/45] COLO: xc related codes Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 22/45] send store mfn and console mfn to xl before resuming secondary vm Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 23/45] implement the cmdline for COLO Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 24/45] HACK: do checkpoint per 20ms Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 25/45] colo: dynamic allocate aio_requests to avoid -EBUSY error Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 26/45] fix memory leak in block-remus Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 27/45] pass uuid to the callback td_open Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 28/45] return the correct dev path Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 29/45] blktap2: use correct way to get remus_image Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 30/45] don't call client_flush() when switching to unprotected mode Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 31/45] remus: fix bug in tdremus_close() Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 32/45] blktap2: use correct way to get free event id Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 33/45] blktap2: don't return negative " Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 34/45] blktap2: use correct way to define array Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 35/45] blktap2: connect to backup asynchronously Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 36/45] switch to unprotected mode before closing Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 37/45] blktap2: move async connect related codes to block-replication.c Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 38/45] blktap2: move ramdisk " Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 39/45] block-colo: implement colo disk replication Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 40/45] pass correct file to qemu if we use blktap2 Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 41/45] support blktap remus in xl Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 42/45] support blktap colo in xl: Wen Congyang
2014-08-08  7:01 ` [RFC Patch v2 43/45] update libxl__device_disk_from_xs_be() to support blktap device Wen Congyang
2014-08-08  7:01 ` Wen Congyang [this message]
2014-08-08  7:01 ` [RFC Patch v2 45/45] x86/hvm: Always set pending event injection when loading VMC[BS] state Wen Congyang
2014-08-08  7:24   ` Jan Beulich
2014-08-08  7:29     ` Wen Congyang
2014-08-26 16:02   ` Jan Beulich
2014-08-27  0:46     ` Wen Congyang
2014-08-27 14:58       ` Aravind Gopalakrishnan
2014-08-28  1:04         ` Wen Congyang
2014-08-28  8:54           ` Andrew Cooper
2014-08-28 11:17             ` Wen Congyang
2014-08-28 11:31               ` Paul Durrant
2014-08-29  5:59                 ` Wen Congyang
2014-08-28  9:53         ` Tim Deegan
2014-08-27 23:24     ` Tian, Kevin
2014-08-27 15:02   ` Andrew Cooper
2014-08-08  7:01 ` [RFC Patch v2 46/45] Introduce "xen-load-devices-state" Wen Congyang
2014-08-08  7:19 ` [RFC Patch v2 00/45] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Jan Beulich
2014-08-08  7:39   ` Wen Congyang
2014-08-08  8:21   ` Wen Congyang
2014-08-08  9:02     ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1407481305-19808-45-git-send-email-wency@cn.fujitsu.com \
    --to=wency@cn.fujitsu.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=Ian.Jackson@eu.citrix.com \
    --cc=eddie.dong@intel.com \
    --cc=laijs@cn.fujitsu.com \
    --cc=xen-devel@lists.xen.org \
    --cc=yanghy@cn.fujitsu.com \
    --cc=yunhong.jiang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).