All of lore.kernel.org
 help / color / mirror / Atom feed
From: Bharata B Rao <bharata@linux.vnet.ibm.com>
To: qemu-devel@nongnu.org
Cc: Anand Avati <aavati@redhat.com>,
	Amar Tumballi <amarts@redhat.com>,
	Vijay Bellur <vbellur@redhat.com>
Subject: [Qemu-devel] [RFC PATCH 2/2] block: gluster as block backend
Date: Sat, 21 Jul 2012 14:01:59 +0530	[thread overview]
Message-ID: <20120721083159.GE1046@in.ibm.com> (raw)
In-Reply-To: <20120721082917.GC1046@in.ibm.com>

block: gluster as block backend

From: Bharata B Rao <bharata@linux.vnet.ibm.com>

This patch adds gluster as the new block backend in QEMU. This gives QEMU
the ability to boot VM images from gluster volumes.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
---

 block/Makefile.objs |    1 
 block/gluster.c     |  483 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 484 insertions(+), 0 deletions(-)
 create mode 100644 block/gluster.c


diff --git a/block/Makefile.objs b/block/Makefile.objs
index b5754d3..a1ae67f 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -9,3 +9,4 @@ block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
+block-obj-$(CONFIG_GLUSTERFS) += gluster.o
diff --git a/block/gluster.c b/block/gluster.c
new file mode 100644
index 0000000..c33a006
--- /dev/null
+++ b/block/gluster.c
@@ -0,0 +1,483 @@
+/*
+ * GlusterFS backend for QEMU
+ *
+ * (AIO implementation is derived from block/rbd.c)
+ *
+ * Copyright (C) 2012 Bharata B Rao <bharata@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#include "block_int.h"
+#include <glusterfs/api/glfs.h>
+
+typedef struct GlusterConf {
+    char server[HOST_NAME_MAX];
+    int port;
+    char volname[128]; /* TODO: use GLUSTERD_MAX_VOLUME_NAME */
+    char image[PATH_MAX];
+} GlusterConf;
+
+typedef struct GlusterAIOCB {
+    BlockDriverAIOCB common;
+    QEMUIOVector *qiov;
+    char *bounce;
+    struct BDRVGlusterState *s;
+    int cancelled;
+} GlusterAIOCB;
+
+typedef struct GlusterCBKData {
+    GlusterAIOCB *acb;
+    struct BDRVGlusterState *s;
+    int64_t size;
+    int ret;
+} GlusterCBKData;
+
+typedef struct BDRVGlusterState {
+    struct glfs *glfs;
+    int fds[2];
+    int open_flags;
+    struct glfs_fd *fd;
+    int qemu_aio_count;
+    int event_reader_pos;
+    GlusterCBKData *event_gcbk;
+} BDRVGlusterState;
+
+#define GLUSTER_FD_READ 0
+#define GLUSTER_FD_WRITE 1
+
+static void qemu_gluster_complete_aio(GlusterCBKData *gcbk)
+{
+    GlusterAIOCB *acb = gcbk->acb;
+    int ret;
+
+    if (acb->cancelled) {
+        qemu_aio_release(acb);
+        goto done;
+    }
+
+    if (gcbk->ret == gcbk->size) {
+        ret = 0; /* Success */
+    } else if (gcbk->ret < 0) {
+        ret = gcbk->ret; /* Read/Write failed */
+    } else {
+        ret = -EINVAL; /* Partial read/write - fail it */
+    }
+    acb->common.cb(acb->common.opaque, ret);
+    qemu_aio_release(acb);
+
+done:
+    g_free(gcbk);
+}
+
+static void qemu_gluster_aio_event_reader(void *opaque)
+{
+    BDRVGlusterState *s = opaque;
+    ssize_t ret;
+
+    do {
+        char *p = (char *)&s->event_gcbk;
+
+        ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos,
+                   sizeof(s->event_gcbk) - s->event_reader_pos);
+        if (ret > 0) {
+            s->event_reader_pos += ret;
+            if (s->event_reader_pos == sizeof(s->event_gcbk)) {
+                s->event_reader_pos = 0;
+                qemu_gluster_complete_aio(s->event_gcbk);
+                s->qemu_aio_count--;
+            }
+        }
+    } while (ret < 0 && errno == EINTR);
+}
+
+static int qemu_gluster_aio_flush_cb(void *opaque)
+{
+    BDRVGlusterState *s = opaque;
+
+    return (s->qemu_aio_count > 0);
+}
+
+/*
+ * file=protocol:server@port:volname:image
+ */
+static int qemu_gluster_parsename(GlusterConf *c, const char *filename)
+{
+    char *file = g_strdup(filename);
+    char *token, *next_token, *saveptr;
+    char *token_s, *next_token_s, *saveptr_s;
+    int ret = -EINVAL;
+
+    /* Discard the protocol */
+    token = strtok_r(file, ":", &saveptr);
+    if (!token) {
+        goto out;
+    }
+
+    /* server@port */
+    next_token = strtok_r(NULL, ":", &saveptr);
+    if (!next_token) {
+        goto out;
+    }
+    if (strchr(next_token, '@')) {
+        token_s = strtok_r(next_token, "@", &saveptr_s);
+        if (!token_s) {
+            goto out;
+        }
+        strncpy(c->server, token_s, HOST_NAME_MAX);
+        next_token_s = strtok_r(NULL, "@", &saveptr_s);
+        if (!next_token_s) {
+            goto out;
+        }
+        c->port = atoi(next_token_s);
+    } else {
+        strncpy(c->server, next_token, HOST_NAME_MAX);
+        c->port = 0;
+    }
+
+    /* volname */
+    next_token = strtok_r(NULL, ":", &saveptr);
+    if (!next_token) {
+        goto out;
+    }
+    strncpy(c->volname, next_token, 128);
+
+    /* image */
+    next_token = strtok_r(NULL, ":", &saveptr);
+    if (!next_token) {
+        goto out;
+    }
+    strncpy(c->image, next_token, PATH_MAX);
+    ret = 0;
+out:
+    g_free(file);
+    return ret;
+}
+
+static struct glfs *qemu_gluster_init(GlusterConf *c, const char *filename)
+{
+    struct glfs *glfs = NULL;
+    int ret;
+
+    ret = qemu_gluster_parsename(c, filename);
+    if (ret < 0) {
+        errno = -ret;
+        goto out;
+    }
+
+    glfs = glfs_new(c->volname);
+    if (!glfs) {
+        goto out;
+    }
+
+    ret = glfs_set_volfile_server(glfs, "socket", c->server, c->port);
+    if (ret < 0) {
+        goto out;
+    }
+
+    /*
+     * TODO: Logging is not necessary but instead nice to have.
+     * Can QEMU optionally log into a standard place ?
+     * Need to use defines like gf_loglevel_t:GF_LOG_INFO instead of
+     * hard coded values like 7 here.
+     */
+    ret = glfs_set_logging(glfs, "/tmp/qemu-gluster.log", 7);
+    if (ret < 0) {
+        goto out;
+    }
+
+    ret = glfs_init(glfs);
+    if (ret < 0) {
+        goto out;
+    }
+    return glfs;
+
+out:
+    if (glfs) {
+        (void)glfs_fini(glfs);
+    }
+    return NULL;
+}
+
+static int qemu_gluster_open(BlockDriverState *bs, const char *filename,
+    int bdrv_flags)
+{
+    BDRVGlusterState *s = bs->opaque;
+    GlusterConf *c = g_malloc(sizeof(GlusterConf));
+    int ret;
+
+    s->glfs = qemu_gluster_init(c, filename);
+    if (!s->glfs) {
+        ret = -errno;
+        goto out;
+    }
+
+    s->open_flags |=  O_BINARY;
+    s->open_flags &= ~O_ACCMODE;
+    if (bdrv_flags & BDRV_O_RDWR) {
+        s->open_flags |= O_RDWR;
+    } else {
+        s->open_flags |= O_RDONLY;
+    }
+
+    if ((bdrv_flags & BDRV_O_NOCACHE)) {
+        s->open_flags |= O_DIRECT;
+    }
+
+    s->fd = glfs_open(s->glfs, c->image, s->open_flags);
+    if (!s->fd) {
+        ret = -errno;
+        goto out;
+    }
+
+    ret = qemu_pipe(s->fds);
+    if (ret < 0) {
+        goto out;
+    }
+    fcntl(s->fds[0], F_SETFL, O_NONBLOCK);
+    fcntl(s->fds[1], F_SETFL, O_NONBLOCK);
+    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
+        qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s);
+    g_free(c);
+    return ret;
+
+out:
+    g_free(c);
+    if (s->fd) {
+        glfs_close(s->fd);
+    }
+    if (s->glfs) {
+        (void) glfs_fini(s->glfs);
+    }
+    return ret;
+}
+
+static int qemu_gluster_create(const char *filename,
+        QEMUOptionParameter *options)
+{
+    struct glfs *glfs;
+    struct glfs_fd *fd;
+    GlusterConf *c = g_malloc(sizeof(GlusterConf));
+    int ret = 0;
+    int64_t total_size = 0;
+
+    glfs = qemu_gluster_init(c, filename);
+    if (!glfs) {
+        ret = -errno;
+        goto out;
+    }
+
+    /* Read out options */
+    while (options && options->name) {
+        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+            total_size = options->value.n / BDRV_SECTOR_SIZE;
+        }
+        options++;
+    }
+
+    fd = glfs_creat(glfs, c->image, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, S_IRWXU);
+    if (!fd) {
+        ret = -errno;
+    } else {
+        if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
+            ret = -errno;
+        }
+        if (glfs_close(fd) != 0) {
+            ret = -errno;
+        }
+    }
+out:
+    g_free(c);
+    if (glfs) {
+        (void) glfs_fini(glfs);
+    }
+    return ret;
+}
+
+static AIOPool gluster_aio_pool = {
+    .aiocb_size = sizeof(GlusterAIOCB),
+};
+
+static int qemu_gluster_send_pipe(BDRVGlusterState *s, GlusterCBKData *gcbk)
+{
+    int ret = 0;
+    while (1) {
+        fd_set wfd;
+        int fd = s->fds[GLUSTER_FD_WRITE];
+
+        ret = write(fd, (void *)&gcbk, sizeof(gcbk));
+        if (ret >= 0) {
+            break;
+        }
+        if (errno == EINTR) {
+            continue;
+        }
+        if (errno != EAGAIN) {
+            break;
+        }
+
+        FD_ZERO(&wfd);
+        FD_SET(fd, &wfd);
+        do {
+            ret = select(fd + 1, NULL, &wfd, NULL, NULL);
+        } while (ret < 0 && errno == EINTR);
+    }
+    return ret;
+}
+
+static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
+{
+    GlusterCBKData *gcbk = (GlusterCBKData *)arg;
+    BDRVGlusterState *s = gcbk->s;
+
+    gcbk->ret = ret;
+    if (qemu_gluster_send_pipe(s, gcbk) < 0) {
+        error_report("Could not complete read/write/flush from gluster");
+        abort();
+    }
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int write)
+{
+    int ret;
+    GlusterAIOCB *acb;
+    GlusterCBKData *gcbk;
+    BDRVGlusterState *s = bs->opaque;
+    size_t size;
+    off_t offset;
+
+    acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque);
+    acb->qiov = qiov;
+    acb->s = s;
+
+    offset = sector_num * BDRV_SECTOR_SIZE;
+    size = nb_sectors * BDRV_SECTOR_SIZE;
+    s->qemu_aio_count++;
+
+    gcbk = g_malloc(sizeof(GlusterCBKData));
+    gcbk->acb = acb;
+    gcbk->s = s;
+    gcbk->size = size;
+
+    if (write) {
+        ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
+            &gluster_finish_aiocb, gcbk);
+    } else {
+        ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
+            &gluster_finish_aiocb, gcbk);
+    }
+
+    if (ret < 0) {
+        goto out;
+    }
+    return &acb->common;
+
+out:
+    g_free(gcbk);
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    return NULL;
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    int ret;
+    GlusterAIOCB *acb;
+    GlusterCBKData *gcbk;
+    BDRVGlusterState *s = bs->opaque;
+
+    acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque);
+    acb->s = s;
+    s->qemu_aio_count++;
+
+    gcbk = g_malloc(sizeof(GlusterCBKData));
+    gcbk->acb = acb;
+    gcbk->s = s;
+    gcbk->size = 0;
+
+    ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, gcbk);
+    if (ret < 0) {
+        goto out;
+    }
+    return &acb->common;
+
+out:
+    g_free(gcbk);
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    return NULL;
+}
+
+static int64_t qemu_gluster_getlength(BlockDriverState *bs)
+{
+    BDRVGlusterState *s = bs->opaque;
+    struct stat st;
+    int ret;
+
+    ret = glfs_fstat(s->fd, &st);
+    if (ret < 0) {
+        return -errno;
+    } else {
+        return st.st_size;
+    }
+}
+
+static void qemu_gluster_close(BlockDriverState *bs)
+{
+    BDRVGlusterState *s = bs->opaque;
+
+    if (s->fd) {
+        glfs_close(s->fd);
+        s->fd = NULL;
+    }
+}
+
+static QEMUOptionParameter qemu_gluster_create_options[] = {
+    {
+        .name = BLOCK_OPT_SIZE,
+        .type = OPT_SIZE,
+        .help = "Virtual disk size"
+    },
+    { NULL }
+};
+
+static BlockDriver bdrv_gluster = {
+    .format_name = "gluster",
+    .protocol_name = "gluster",
+    .instance_size = sizeof(BDRVGlusterState),
+    .bdrv_file_open = qemu_gluster_open,
+    .bdrv_close = qemu_gluster_close,
+    .bdrv_create = qemu_gluster_create,
+    .bdrv_getlength = qemu_gluster_getlength,
+
+    .bdrv_aio_readv = qemu_gluster_aio_readv,
+    .bdrv_aio_writev = qemu_gluster_aio_writev,
+    .bdrv_aio_flush = qemu_gluster_aio_flush,
+
+    .create_options = qemu_gluster_create_options,
+};
+
+static void bdrv_gluster_init(void)
+{
+    bdrv_register(&bdrv_gluster);
+}
+
+block_init(bdrv_gluster_init);

  parent reply	other threads:[~2012-07-21  8:31 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-07-21  8:29 [Qemu-devel] [RFC PATCH 0/2] GlusterFS support in QEMU - v2 Bharata B Rao
2012-07-21  8:30 ` [Qemu-devel] [RFC PATCH 1/2] qemu: Add a config option for GlusterFS as block backend Bharata B Rao
2012-07-21  8:31 ` Bharata B Rao [this message]
2012-07-22 15:38   ` [Qemu-devel] [RFC PATCH 2/2] block: gluster " Stefan Hajnoczi
2012-07-23  8:32     ` Bharata B Rao
2012-07-23  9:06       ` Stefan Hajnoczi
2012-07-21 12:22 ` [Qemu-devel] [RFC PATCH 0/2] GlusterFS support in QEMU - v2 Vijay Bellur
2012-07-21 13:04   ` Bharata B Rao
2012-07-22 14:42 ` Stefan Hajnoczi
2012-07-23  8:50   ` Bharata B Rao
2012-07-23  9:20     ` Stefan Hajnoczi
2012-07-23  9:34       ` ronnie sahlberg
2012-07-23  9:35         ` Stefan Hajnoczi
2012-07-23 14:34       ` Eric Blake
2012-07-24  3:34         ` Bharata B Rao
2012-07-24 10:24       ` Kevin Wolf
2012-07-24 11:30       ` Markus Armbruster
2012-07-23  9:36     ` Vijay Bellur
2012-07-23  9:16 ` Daniel P. Berrange
2012-07-23  9:28   ` ronnie sahlberg

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120721083159.GE1046@in.ibm.com \
    --to=bharata@linux.vnet.ibm.com \
    --cc=aavati@redhat.com \
    --cc=amarts@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=vbellur@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.