From: Bharata B Rao <bharata@linux.vnet.ibm.com>
To: qemu-devel@nongnu.org
Cc: Amar Tumballi <amarts@redhat.com>, Vijay Bellur <vbellur@redhat.com>
Subject: [Qemu-devel] [RFC PATCH 3/3] block: gluster as block backend
Date: Mon, 11 Jun 2012 19:51:44 +0530 [thread overview]
Message-ID: <20120611142144.GD2737@in.ibm.com> (raw)
In-Reply-To: <20120611141806.GA2737@in.ibm.com>
block: gluster as block backend
From: Bharata B Rao <bharata@linux.vnet.ibm.com>
This patch adds gluster as the new block backend in QEMU. This gives QEMU
the ability to boot VM images from gluster volumes.
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
---
Makefile.objs | 2
block/gluster.c | 435 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 436 insertions(+), 1 deletions(-)
create mode 100644 block/gluster.c
diff --git a/Makefile.objs b/Makefile.objs
index 25190ba..859b88a 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -59,7 +59,7 @@ block-nested-$(CONFIG_POSIX) += raw-posix.o
block-nested-$(CONFIG_LIBISCSI) += iscsi.o
block-nested-$(CONFIG_CURL) += curl.o
block-nested-$(CONFIG_RBD) += rbd.o
-block-nested-$(CONFIG_GLUSTERFS) += gluster-helpers.o
+block-nested-$(CONFIG_GLUSTERFS) += gluster-helpers.o gluster.o
block-obj-y += $(addprefix block/, $(block-nested-y))
diff --git a/block/gluster.c b/block/gluster.c
new file mode 100644
index 0000000..1566cb7
--- /dev/null
+++ b/block/gluster.c
@@ -0,0 +1,435 @@
+/*
+ * GlusterFS backend for QEMU
+ *
+ * (AIO implementation is derived from block/rbd.c)
+ *
+ * Copyright (C) 2012 Bharata B Rao <bharata@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#include "block_int.h"
+#include "gluster-helpers.h"
+
+typedef void *gluster_file_t;
+
+typedef struct glusterConf {
+ char volfile[PATH_MAX];
+ char image[PATH_MAX];
+} glusterConf;
+
+typedef struct BDRVGlusterState {
+ int fds[2];
+ int open_flags;
+ gluster_file_t fd;
+ glusterfs_ctx_t *ctx;
+ int qemu_aio_count;
+ int event_reader_pos;
+ gluster_aiocb_t *event_gaiocb;
+} BDRVGlusterState;
+
+typedef struct glusterAIOCB {
+ BlockDriverAIOCB common;
+ QEMUBH *bh;
+ QEMUIOVector *qiov;
+ int ret;
+ int write;
+ char *bounce;
+ BDRVGlusterState *s;
+ int cancelled;
+ int error;
+} glusterAIOCB;
+
+#define GLUSTER_FD_READ 0
+#define GLUSTER_FD_WRITE 1
+
+/*
+ * file=protocol:volfile:image
+ */
+static int qemu_gluster_parsename(glusterConf *c, const char *filename)
+{
+ char *file = g_strdup(filename);
+ char *token, *next_token, *saveptr;
+ int ret = 0;
+
+ /* Discard the protocol */
+ token = strtok_r(file, ":", &saveptr);
+ if (!token) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* volfile */
+ next_token = strtok_r(NULL, ":", &saveptr);
+ if (!next_token) {
+ ret = -EINVAL;
+ goto out;
+ }
+ strncpy(c->volfile, next_token, PATH_MAX);
+
+ /* image */
+ next_token = strtok_r(NULL, ":", &saveptr);
+ if (!next_token) {
+ ret = -EINVAL;
+ goto out;
+ }
+ strncpy(c->image, next_token, PATH_MAX);
+out:
+ g_free(file);
+ return ret;
+}
+
+static void gluster_aio_bh_cb(void *opaque)
+{
+ glusterAIOCB *acb = opaque;
+
+ if (!acb->write) {
+ qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
+ }
+ qemu_vfree(acb->bounce);
+ acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
+ qemu_bh_delete(acb->bh);
+ acb->bh = NULL;
+
+ qemu_aio_release(acb);
+}
+
+static void qemu_gluster_complete_aio(gluster_aiocb_t *gaiocb)
+{
+ glusterAIOCB *acb = (glusterAIOCB *)gaiocb->opaque;
+ int64_t r;
+
+ if (acb->cancelled) {
+ qemu_vfree(acb->bounce);
+ qemu_aio_release(acb);
+ goto done;
+ }
+
+ r = gaiocb->ret;
+
+ if (acb->write) {
+ if (r < 0) {
+ acb->ret = r;
+ acb->error = 1;
+ } else if (!acb->error) {
+ acb->ret = gaiocb->size;
+ }
+ } else {
+ if (r < 0) {
+ memset(gaiocb->buf, 0, gaiocb->size);
+ acb->ret = r;
+ acb->error = 1;
+ } else if (r < gaiocb->size) {
+ memset(gaiocb->buf + r, 0, gaiocb->size - r);
+ if (!acb->error) {
+ acb->ret = gaiocb->size;
+ }
+ } else if (!acb->error) {
+ acb->ret = r;
+ }
+ }
+ acb->bh = qemu_bh_new(gluster_aio_bh_cb, acb);
+ qemu_bh_schedule(acb->bh);
+done:
+ g_free(gaiocb);
+}
+
+static void qemu_gluster_aio_event_reader(void *opaque)
+{
+ BDRVGlusterState *s = opaque;
+ ssize_t ret;
+
+ do {
+ char *p = (char *)&s->event_gaiocb;
+
+ ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos,
+ sizeof(s->event_gaiocb) - s->event_reader_pos);
+ if (ret > 0) {
+ s->event_reader_pos += ret;
+ if (s->event_reader_pos == sizeof(s->event_gaiocb)) {
+ s->event_reader_pos = 0;
+ qemu_gluster_complete_aio(s->event_gaiocb);
+ s->qemu_aio_count--;
+ }
+ }
+ } while (ret < 0 && errno == EINTR);
+}
+
+static int qemu_gluster_aio_flush_cb(void *opaque)
+{
+ BDRVGlusterState *s = opaque;
+
+ return (s->qemu_aio_count > 0);
+}
+
+static int qemu_gluster_open(BlockDriverState *bs, const char *filename,
+ int bdrv_flags)
+{
+ BDRVGlusterState *s = bs->opaque;
+ glusterConf *c = g_malloc(sizeof(glusterConf));
+ int ret = -1;
+
+ if (qemu_gluster_parsename(c, filename)) {
+ goto out;
+ }
+
+ s->ctx = gluster_init(c->volfile);
+ if (!s->ctx) {
+ goto out;
+ }
+
+ /* FIX: Server client handshake takes time */
+ sleep(1);
+
+ s->open_flags |= O_BINARY;
+ s->open_flags &= ~O_ACCMODE;
+ if (bdrv_flags & BDRV_O_RDWR) {
+ s->open_flags |= O_RDWR;
+ } else {
+ s->open_flags |= O_RDONLY;
+ }
+
+ /* Use O_DSYNC for write-through caching, no flags for write-back caching,
+ * and O_DIRECT for no caching. */
+ if ((bdrv_flags & BDRV_O_NOCACHE))
+ s->open_flags |= O_DIRECT;
+ if (!(bdrv_flags & BDRV_O_CACHE_WB))
+ s->open_flags |= O_DSYNC;
+
+ s->fd = gluster_open(c->image, s->open_flags, 0);
+ if (!s->fd) {
+ goto out;
+ }
+
+ ret = qemu_pipe(s->fds);
+ if (ret < 0) {
+ goto out;
+ }
+ fcntl(s->fds[0], F_SETFL, O_NONBLOCK);
+ fcntl(s->fds[1], F_SETFL, O_NONBLOCK);
+ qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
+ qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s);
+out:
+ if (c) {
+ g_free(c);
+ }
+ if (ret < 0) {
+ gluster_close(s->fd);
+ }
+ return ret;
+}
+
+static int qemu_gluster_create(const char *filename,
+ QEMUOptionParameter *options)
+{
+ glusterConf *c = g_malloc(sizeof(glusterConf));
+ int ret = 0;
+ gluster_file_t fd;
+ int64_t total_size = 0;
+
+ ret = qemu_gluster_parsename(c, filename);
+ if (ret) {
+ goto out;
+ }
+
+ if (!gluster_init(c->volfile)) {
+ ret = -1;
+ goto out;
+ }
+
+ /* FIX: Server client handshake takes time */
+ sleep(1);
+
+ /* Read out options */
+ while (options && options->name) {
+ if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+ total_size = options->value.n / BDRV_SECTOR_SIZE;
+ }
+ options++;
+ }
+
+ fd = gluster_creat(c->image, 0644);
+ if (!fd) {
+ ret = -errno;
+ } else {
+ if (gluster_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
+ ret = -errno;
+ }
+ if (gluster_close(fd) != 0) {
+ ret = -errno;
+ }
+ }
+out:
+ if (c) {
+ g_free(c);
+ }
+ return ret;
+}
+
+static AIOPool gluster_aio_pool = {
+ .aiocb_size = sizeof(glusterAIOCB),
+};
+
+static int qemu_gluster_send_pipe(BDRVGlusterState *s, gluster_aiocb_t *gaiocb)
+{
+ int ret = 0;
+ while (1) {
+ fd_set wfd;
+ int fd = s->fds[GLUSTER_FD_WRITE];
+
+ ret = write(fd, (void *)&gaiocb, sizeof(gaiocb));
+ if (ret >= 0) {
+ break;
+ }
+ if (errno == EINTR) {
+ continue;
+ }
+ if (errno != EAGAIN) {
+ break;
+ }
+
+ FD_ZERO(&wfd);
+ FD_SET(fd, &wfd);
+ do {
+ ret = select(fd + 1, NULL, &wfd, NULL, NULL);
+ } while (ret < 0 && errno == EINTR);
+ }
+ return ret;
+}
+
+static void gluster_finish_aiocb(void *arg)
+{
+ int ret;
+ gluster_aiocb_t *gaiocb = (gluster_aiocb_t *)arg;
+ BDRVGlusterState *s = ((glusterAIOCB *)gaiocb->opaque)->s;
+
+ ret = qemu_gluster_send_pipe(s, gaiocb);
+ if (ret < 0) {
+ g_free(gaiocb);
+ }
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque, int write)
+{
+ int ret;
+ glusterAIOCB *acb;
+ gluster_aiocb_t *gaiocb;
+ BDRVGlusterState *s = bs->opaque;
+ char *buf;
+ size_t size;
+ off_t offset;
+
+ acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque);
+ acb->write = write;
+ acb->qiov = qiov;
+ acb->bounce = qemu_blockalign(bs, qiov->size);
+ acb->ret = 0;
+ acb->bh = NULL;
+ acb->s = s;
+
+ if (write) {
+ qemu_iovec_to_buffer(acb->qiov, acb->bounce);
+ }
+
+ buf = acb->bounce;
+ offset = sector_num * BDRV_SECTOR_SIZE;
+ size = nb_sectors * BDRV_SECTOR_SIZE;
+ s->qemu_aio_count++;
+
+ gaiocb = g_malloc(sizeof(gluster_aiocb_t));
+ gaiocb->opaque = acb;
+ gaiocb->buf = buf;
+ gaiocb->offset = offset;
+ gaiocb->size = size;
+ gaiocb->completion_fn = &gluster_finish_aiocb;
+
+ if (write) {
+ ret = gluster_aio_writev(s->fd, gaiocb);
+ } else {
+ ret = gluster_aio_readv(s->fd, gaiocb);
+ }
+
+ if (ret < 0) {
+ goto out;
+ }
+ return &acb->common;
+
+out:
+ g_free(gaiocb);
+ s->qemu_aio_count--;
+ qemu_aio_release(acb);
+ return NULL;
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
+}
+
+static int64_t qemu_gluster_getlength(BlockDriverState *bs)
+{
+ BDRVGlusterState *s = bs->opaque;
+ gluster_file_t fd = s->fd;
+ struct stat st;
+ int ret;
+
+ ret = gluster_fstat(fd, &st);
+ if (ret < 0) {
+ return -1;
+ } else {
+ return st.st_size;
+ }
+}
+
+static void qemu_gluster_close(BlockDriverState *bs)
+{
+ BDRVGlusterState *s = bs->opaque;
+
+ if (s->fd) {
+ gluster_close(s->fd);
+ s->fd = NULL;
+ }
+}
+
+static QEMUOptionParameter qemu_gluster_create_options[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ { NULL }
+};
+
+static BlockDriver bdrv_gluster = {
+ .format_name = "gluster",
+ .protocol_name = "gluster",
+ .instance_size = sizeof(BDRVGlusterState),
+ .bdrv_file_open = qemu_gluster_open,
+ .bdrv_close = qemu_gluster_close,
+ .bdrv_create = qemu_gluster_create,
+ .bdrv_getlength = qemu_gluster_getlength,
+
+ .bdrv_aio_readv = qemu_gluster_aio_readv,
+ .bdrv_aio_writev = qemu_gluster_aio_writev,
+
+ .create_options = qemu_gluster_create_options,
+};
+
+static void bdrv_gluster_init(void)
+{
+ bdrv_register(&bdrv_gluster);
+}
+
+block_init(bdrv_gluster_init);
next prev parent reply other threads:[~2012-06-11 14:21 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-06-11 14:18 [Qemu-devel] [RFC PATCH 0/3] GlusterFS support in QEMU Bharata B Rao
2012-06-11 14:19 ` [Qemu-devel] [RFC PATCH 1/3] qemu: Add a config option for GlusterFS as block backend Bharata B Rao
2012-06-11 14:20 ` [Qemu-devel] [RFC PATCH 2/3] block: GlusterFS helpers to interface with libglusterfs Bharata B Rao
2012-06-18 17:35 ` Stefan Hajnoczi
2012-06-19 9:31 ` Bharata B Rao
2012-07-02 9:52 ` Paolo Bonzini
2012-07-02 10:05 ` Bharata B Rao
2012-06-11 14:21 ` Bharata B Rao [this message]
2012-06-18 17:35 ` [Qemu-devel] [RFC PATCH 3/3] block: gluster as block backend Stefan Hajnoczi
2012-06-19 9:27 ` Avi Kivity
2012-06-19 9:30 ` Bharata B Rao
2012-06-19 11:05 ` Stefan Hajnoczi
2012-07-01 14:50 ` Paolo Bonzini
2012-07-01 14:49 ` Paolo Bonzini
2012-06-18 15:36 ` [Qemu-devel] [RFC PATCH 0/3] GlusterFS support in QEMU Stefan Hajnoczi
2012-06-19 9:10 ` Bharata B Rao
2012-07-06 5:35 ` Bharata B Rao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120611142144.GD2737@in.ibm.com \
--to=bharata@linux.vnet.ibm.com \
--cc=amarts@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=vbellur@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.