qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [RFC PATCH v1 0/3] gluster: conversion to coroutines and supporting write_zeroes
@ 2013-12-05 11:01 Bharata B Rao
  2013-12-05 11:01 ` [Qemu-devel] [RFC PATCH v1 1/3] gluster: Convert aio routines into coroutines Bharata B Rao
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Bharata B Rao @ 2013-12-05 11:01 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, jcody, stefanha, Bharata B Rao

Hi,

This series is about converting all the bdrv_aio* implementations in gluster
driver to coroutine based implementations. Read, write, flush and discard
routines are converted.

This also adds support for .bdrv_co_write_zeroes() in gluster and provides
a new preallocation option with qemu-img (-o preallocation=full) that can
be used for raw images on GlusterFS backend to create fully allocated and
zero-filled images.

Changes in v1
-------------
- Removed qemu_gluster_aio_cancel() and associated code.
- Calling qemu_aio_release() from where aiocb is created.
- s/qemu_gluster_aio_rw/qemu_gluster_co_rw.
- Use errno appropriately from read, write, flush, discard and zerofill routines
  in gluster driver.
- Fix a memory leak in qemu_gluster_create().
- Proceed with glfs_zerofill() only if glfs_ftruncate() succeeds in
  qemu_gluster_create().

Bharata B Rao (3):
  gluster: Convert aio routines into coroutines
  gluster: Implement .bdrv_co_write_zeroes for gluster
  gluster: Add support for creating zero-filled image

 block/gluster.c | 297 +++++++++++++++++++++++++++++++-------------------------
 configure       |   8 ++
 2 files changed, 174 insertions(+), 131 deletions(-)

-- 
1.7.11.7

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Qemu-devel] [RFC PATCH v1 1/3] gluster: Convert aio routines into coroutines
  2013-12-05 11:01 [Qemu-devel] [RFC PATCH v1 0/3] gluster: conversion to coroutines and supporting write_zeroes Bharata B Rao
@ 2013-12-05 11:01 ` Bharata B Rao
  2013-12-16 16:33   ` Stefan Hajnoczi
  2013-12-05 11:01 ` [Qemu-devel] [RFC PATCH v1 2/3] gluster: Implement .bdrv_co_write_zeroes for gluster Bharata B Rao
  2013-12-05 11:01 ` [Qemu-devel] [RFC PATCH v1 3/3] gluster: Add support for creating zero-filled image Bharata B Rao
  2 siblings, 1 reply; 6+ messages in thread
From: Bharata B Rao @ 2013-12-05 11:01 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, jcody, stefanha, Bharata B Rao

Convert the read, write, flush and discard implementations from aio-based
ones to coroutine based ones.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
---
 block/gluster.c | 184 +++++++++++++++++++-------------------------------------
 1 file changed, 63 insertions(+), 121 deletions(-)

diff --git a/block/gluster.c b/block/gluster.c
index 877686a..88ef48d 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -24,13 +24,12 @@ typedef struct GlusterAIOCB {
     BlockDriverAIOCB common;
     int64_t size;
     int ret;
-    bool *finished;
     QEMUBH *bh;
+    Coroutine *coroutine;
 } GlusterAIOCB;
 
 typedef struct BDRVGlusterState {
     struct glfs *glfs;
-    int fds[2];
     struct glfs_fd *fd;
     int event_reader_pos;
     GlusterAIOCB *event_acb;
@@ -231,46 +230,19 @@ out:
     return NULL;
 }
 
-static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s)
+static void qemu_gluster_complete_aio(void *opaque)
 {
-    int ret;
-    bool *finished = acb->finished;
-    BlockDriverCompletionFunc *cb = acb->common.cb;
-    void *opaque = acb->common.opaque;
-
-    if (!acb->ret || acb->ret == acb->size) {
-        ret = 0; /* Success */
-    } else if (acb->ret < 0) {
-        ret = acb->ret; /* Read/Write failed */
-    } else {
-        ret = -EIO; /* Partial read/write - fail it */
-    }
+    GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
 
-    qemu_aio_release(acb);
-    cb(opaque, ret);
-    if (finished) {
-        *finished = true;
+    if (acb->ret == acb->size) {
+        acb->ret = 0;
+    } else if (acb->ret > 0) {
+        acb->ret = -EIO; /* Partial read/write - fail it */
     }
-}
 
-static void qemu_gluster_aio_event_reader(void *opaque)
-{
-    BDRVGlusterState *s = opaque;
-    ssize_t ret;
-
-    do {
-        char *p = (char *)&s->event_acb;
-
-        ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos,
-                   sizeof(s->event_acb) - s->event_reader_pos);
-        if (ret > 0) {
-            s->event_reader_pos += ret;
-            if (s->event_reader_pos == sizeof(s->event_acb)) {
-                s->event_reader_pos = 0;
-                qemu_gluster_complete_aio(s->event_acb, s);
-            }
-        }
-    } while (ret < 0 && errno == EINTR);
+    qemu_bh_delete(acb->bh);
+    acb->bh = NULL;
+    qemu_coroutine_enter(acb->coroutine, NULL);
 }
 
 /* TODO Convert to fine grained options */
@@ -309,7 +281,6 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
 
     filename = qemu_opt_get(opts, "filename");
 
-
     s->glfs = qemu_gluster_init(gconf, filename);
     if (!s->glfs) {
         ret = -errno;
@@ -329,17 +300,7 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
     s->fd = glfs_open(s->glfs, gconf->image, open_flags);
     if (!s->fd) {
         ret = -errno;
-        goto out;
-    }
-
-    ret = qemu_pipe(s->fds);
-    if (ret < 0) {
-        ret = -errno;
-        goto out;
     }
-    fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK);
-    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
-        qemu_gluster_aio_event_reader, NULL, s);
 
 out:
     qemu_opts_del(opts);
@@ -398,44 +359,24 @@ out:
     return ret;
 }
 
-static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-    GlusterAIOCB *acb = (GlusterAIOCB *)blockacb;
-    bool finished = false;
-
-    acb->finished = &finished;
-    while (!finished) {
-        qemu_aio_wait();
-    }
-}
-
 static const AIOCBInfo gluster_aiocb_info = {
     .aiocb_size = sizeof(GlusterAIOCB),
-    .cancel = qemu_gluster_aio_cancel,
 };
 
+/*
+ * AIO callback routine called from GlusterFS thread.
+ */
 static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
 {
     GlusterAIOCB *acb = (GlusterAIOCB *)arg;
-    BlockDriverState *bs = acb->common.bs;
-    BDRVGlusterState *s = bs->opaque;
-    int retval;
 
     acb->ret = ret;
-    retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb));
-    if (retval != sizeof(acb)) {
-        /*
-         * Gluster AIO callback thread failed to notify the waiting
-         * QEMU thread about IO completion.
-         */
-        error_report("Gluster AIO completion failed: %s", strerror(errno));
-        abort();
-    }
+    acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb);
+    qemu_bh_schedule(acb->bh);
 }
 
-static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int write)
+static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write)
 {
     int ret;
     GlusterAIOCB *acb;
@@ -446,10 +387,10 @@ static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
     offset = sector_num * BDRV_SECTOR_SIZE;
     size = nb_sectors * BDRV_SECTOR_SIZE;
 
-    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
+    acb = qemu_aio_get(&gluster_aiocb_info, bs, NULL, NULL);
     acb->size = size;
     acb->ret = 0;
-    acb->finished = NULL;
+    acb->coroutine = qemu_coroutine_self();
 
     if (write) {
         ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
@@ -460,13 +401,16 @@ static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
     }
 
     if (ret < 0) {
+        ret = -errno;
         goto out;
     }
-    return &acb->common;
+
+    qemu_coroutine_yield();
+    ret = acb->ret;
 
 out:
     qemu_aio_release(acb);
-    return NULL;
+    return ret;
 }
 
 static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
@@ -482,47 +426,46 @@ static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
     return 0;
 }
 
-static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
 {
-    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0);
 }
 
-static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
 {
-    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
+    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
 }
 
-static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
 {
     int ret;
     GlusterAIOCB *acb;
     BDRVGlusterState *s = bs->opaque;
 
-    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
+    acb = qemu_aio_get(&gluster_aiocb_info, bs, NULL, NULL);
     acb->size = 0;
     acb->ret = 0;
-    acb->finished = NULL;
+    acb->coroutine = qemu_coroutine_self();
 
     ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
     if (ret < 0) {
+        ret = -errno;
         goto out;
     }
-    return &acb->common;
+
+    qemu_coroutine_yield();
+    ret = acb->ret;
 
 out:
     qemu_aio_release(acb);
-    return NULL;
+    return ret;
 }
 
 #ifdef CONFIG_GLUSTERFS_DISCARD
-static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, BlockDriverCompletionFunc *cb,
-        void *opaque)
+static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors)
 {
     int ret;
     GlusterAIOCB *acb;
@@ -533,20 +476,23 @@ static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs,
     offset = sector_num * BDRV_SECTOR_SIZE;
     size = nb_sectors * BDRV_SECTOR_SIZE;
 
-    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
+    acb = qemu_aio_get(&gluster_aiocb_info, bs, NULL, NULL);
     acb->size = 0;
     acb->ret = 0;
-    acb->finished = NULL;
+    acb->coroutine = qemu_coroutine_self();
 
     ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
     if (ret < 0) {
+        ret = -errno;
         goto out;
     }
-    return &acb->common;
+
+    qemu_coroutine_yield();
+    ret = acb->ret;
 
 out:
     qemu_aio_release(acb);
-    return NULL;
+    return ret;
 }
 #endif
 
@@ -581,10 +527,6 @@ static void qemu_gluster_close(BlockDriverState *bs)
 {
     BDRVGlusterState *s = bs->opaque;
 
-    close(s->fds[GLUSTER_FD_READ]);
-    close(s->fds[GLUSTER_FD_WRITE]);
-    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL);
-
     if (s->fd) {
         glfs_close(s->fd);
         s->fd = NULL;
@@ -618,12 +560,12 @@ static BlockDriver bdrv_gluster = {
     .bdrv_getlength               = qemu_gluster_getlength,
     .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
     .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_aio_readv               = qemu_gluster_aio_readv,
-    .bdrv_aio_writev              = qemu_gluster_aio_writev,
-    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .bdrv_co_readv                = qemu_gluster_co_readv,
+    .bdrv_co_writev               = qemu_gluster_co_writev,
+    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
     .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
+    .bdrv_co_discard              = qemu_gluster_co_discard,
 #endif
     .create_options               = qemu_gluster_create_options,
 };
@@ -639,12 +581,12 @@ static BlockDriver bdrv_gluster_tcp = {
     .bdrv_getlength               = qemu_gluster_getlength,
     .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
     .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_aio_readv               = qemu_gluster_aio_readv,
-    .bdrv_aio_writev              = qemu_gluster_aio_writev,
-    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .bdrv_co_readv                = qemu_gluster_co_readv,
+    .bdrv_co_writev               = qemu_gluster_co_writev,
+    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
     .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
+    .bdrv_co_discard              = qemu_gluster_co_discard,
 #endif
     .create_options               = qemu_gluster_create_options,
 };
@@ -660,12 +602,12 @@ static BlockDriver bdrv_gluster_unix = {
     .bdrv_getlength               = qemu_gluster_getlength,
     .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
     .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_aio_readv               = qemu_gluster_aio_readv,
-    .bdrv_aio_writev              = qemu_gluster_aio_writev,
-    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .bdrv_co_readv                = qemu_gluster_co_readv,
+    .bdrv_co_writev               = qemu_gluster_co_writev,
+    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
     .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
+    .bdrv_co_discard              = qemu_gluster_co_discard,
 #endif
     .create_options               = qemu_gluster_create_options,
 };
@@ -681,12 +623,12 @@ static BlockDriver bdrv_gluster_rdma = {
     .bdrv_getlength               = qemu_gluster_getlength,
     .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
     .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_aio_readv               = qemu_gluster_aio_readv,
-    .bdrv_aio_writev              = qemu_gluster_aio_writev,
-    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .bdrv_co_readv                = qemu_gluster_co_readv,
+    .bdrv_co_writev               = qemu_gluster_co_writev,
+    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
     .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
+    .bdrv_co_discard              = qemu_gluster_co_discard,
 #endif
     .create_options               = qemu_gluster_create_options,
 };
-- 
1.7.11.7

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [RFC PATCH v1 2/3] gluster: Implement .bdrv_co_write_zeroes for gluster
  2013-12-05 11:01 [Qemu-devel] [RFC PATCH v1 0/3] gluster: conversion to coroutines and supporting write_zeroes Bharata B Rao
  2013-12-05 11:01 ` [Qemu-devel] [RFC PATCH v1 1/3] gluster: Convert aio routines into coroutines Bharata B Rao
@ 2013-12-05 11:01 ` Bharata B Rao
  2013-12-05 11:01 ` [Qemu-devel] [RFC PATCH v1 3/3] gluster: Add support for creating zero-filled image Bharata B Rao
  2 siblings, 0 replies; 6+ messages in thread
From: Bharata B Rao @ 2013-12-05 11:01 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, jcody, stefanha, Bharata B Rao

Support .bdrv_co_write_zeroes() from gluster driver by using GlusterFS API
glfs_zerofill() that off-loads the writing of zeroes to GlusterFS server.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
---
 block/gluster.c | 77 +++++++++++++++++++++++++++++++++++++++++++++------------
 configure       |  8 ++++++
 2 files changed, 69 insertions(+), 16 deletions(-)

diff --git a/block/gluster.c b/block/gluster.c
index 88ef48d..1390270 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -245,6 +245,22 @@ static void qemu_gluster_complete_aio(void *opaque)
     qemu_coroutine_enter(acb->coroutine, NULL);
 }
 
+/*
+ * AIO callback routine called from GlusterFS thread.
+ */
+static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
+{
+    GlusterAIOCB *acb = (GlusterAIOCB *)arg;
+
+    acb->ret = ret;
+    acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb);
+    qemu_bh_schedule(acb->bh);
+}
+
+static const AIOCBInfo gluster_aiocb_info = {
+    .aiocb_size = sizeof(GlusterAIOCB),
+};
+
 /* TODO Convert to fine grained options */
 static QemuOptsList runtime_opts = {
     .name = "gluster",
@@ -317,6 +333,39 @@ out:
     return ret;
 }
 
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+static int qemu_gluster_co_write_zeroes(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors)
+{
+    int ret;
+    GlusterAIOCB *acb;
+    BDRVGlusterState *s = bs->opaque;
+    off_t size;
+    off_t offset;
+
+    offset = sector_num * BDRV_SECTOR_SIZE;
+    size = nb_sectors * BDRV_SECTOR_SIZE;
+
+    acb = qemu_aio_get(&gluster_aiocb_info, bs, NULL, NULL);
+    acb->size = size;
+    acb->ret = 0;
+    acb->coroutine = qemu_coroutine_self();
+
+    ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
+    if (ret < 0) {
+        ret = -errno;
+        goto out;
+    }
+
+    qemu_coroutine_yield();
+    ret = acb->ret;
+
+out:
+    qemu_aio_release(acb);
+    return ret;
+}
+#endif
+
 static int qemu_gluster_create(const char *filename,
         QEMUOptionParameter *options, Error **errp)
 {
@@ -359,22 +408,6 @@ out:
     return ret;
 }
 
-static const AIOCBInfo gluster_aiocb_info = {
-    .aiocb_size = sizeof(GlusterAIOCB),
-};
-
-/*
- * AIO callback routine called from GlusterFS thread.
- */
-static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
-{
-    GlusterAIOCB *acb = (GlusterAIOCB *)arg;
-
-    acb->ret = ret;
-    acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb);
-    qemu_bh_schedule(acb->bh);
-}
-
 static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write)
 {
@@ -567,6 +600,9 @@ static BlockDriver bdrv_gluster = {
 #ifdef CONFIG_GLUSTERFS_DISCARD
     .bdrv_co_discard              = qemu_gluster_co_discard,
 #endif
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
+#endif
     .create_options               = qemu_gluster_create_options,
 };
 
@@ -588,6 +624,9 @@ static BlockDriver bdrv_gluster_tcp = {
 #ifdef CONFIG_GLUSTERFS_DISCARD
     .bdrv_co_discard              = qemu_gluster_co_discard,
 #endif
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
+#endif
     .create_options               = qemu_gluster_create_options,
 };
 
@@ -609,6 +648,9 @@ static BlockDriver bdrv_gluster_unix = {
 #ifdef CONFIG_GLUSTERFS_DISCARD
     .bdrv_co_discard              = qemu_gluster_co_discard,
 #endif
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
+#endif
     .create_options               = qemu_gluster_create_options,
 };
 
@@ -630,6 +672,9 @@ static BlockDriver bdrv_gluster_rdma = {
 #ifdef CONFIG_GLUSTERFS_DISCARD
     .bdrv_co_discard              = qemu_gluster_co_discard,
 #endif
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
+#endif
     .create_options               = qemu_gluster_create_options,
 };
 
diff --git a/configure b/configure
index 0666228..886d71b 100755
--- a/configure
+++ b/configure
@@ -255,6 +255,7 @@ coroutine_pool=""
 seccomp=""
 glusterfs=""
 glusterfs_discard="no"
+glusterfs_zerofill="no"
 virtio_blk_data_plane=""
 gtk=""
 gtkabi="2.0"
@@ -2673,6 +2674,9 @@ if test "$glusterfs" != "no" ; then
     if $pkg_config --atleast-version=5 glusterfs-api; then
       glusterfs_discard="yes"
     fi
+    if $pkg_config --atleast-version=6 glusterfs-api; then
+      glusterfs_zerofill="yes"
+    fi
   else
     if test "$glusterfs" = "yes" ; then
       feature_not_found "GlusterFS backend support"
@@ -4175,6 +4179,10 @@ if test "$glusterfs_discard" = "yes" ; then
   echo "CONFIG_GLUSTERFS_DISCARD=y" >> $config_host_mak
 fi
 
+if test "$glusterfs_zerofill" = "yes" ; then
+  echo "CONFIG_GLUSTERFS_ZEROFILL=y" >> $config_host_mak
+fi
+
 if test "$libssh2" = "yes" ; then
   echo "CONFIG_LIBSSH2=y" >> $config_host_mak
 fi
-- 
1.7.11.7

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [RFC PATCH v1 3/3] gluster: Add support for creating zero-filled image
  2013-12-05 11:01 [Qemu-devel] [RFC PATCH v1 0/3] gluster: conversion to coroutines and supporting write_zeroes Bharata B Rao
  2013-12-05 11:01 ` [Qemu-devel] [RFC PATCH v1 1/3] gluster: Convert aio routines into coroutines Bharata B Rao
  2013-12-05 11:01 ` [Qemu-devel] [RFC PATCH v1 2/3] gluster: Implement .bdrv_co_write_zeroes for gluster Bharata B Rao
@ 2013-12-05 11:01 ` Bharata B Rao
  2013-12-16 16:39   ` Stefan Hajnoczi
  2 siblings, 1 reply; 6+ messages in thread
From: Bharata B Rao @ 2013-12-05 11:01 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, jcody, stefanha, Bharata B Rao

GlusterFS supports creation of zero-filled file on GlusterFS volume
by means of an API called glfs_zerofill(). Use this API from QEMU to
create an image that is filled with zeroes by using the preallocation
option of qemu-img.

qemu-img create gluster://server/volume/image -o preallocation=full 10G

The allowed values for preallocation are 'full' and 'off'. By default
preallocation is off and image is not zero-filled.

glfs_zerofill() offloads the writing of zeroes to the server and if
the storage supports SCSI WRITESAME, GlusterFS server can issue
BLKZEROOUT ioctl to achieve the zeroing.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
---
 block/gluster.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/block/gluster.c b/block/gluster.c
index 1390270..c167abe 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -364,6 +364,29 @@ out:
     qemu_aio_release(acb);
     return ret;
 }
+
+static inline int gluster_supports_zerofill(void)
+{
+    return 1;
+}
+
+static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
+        int64_t size)
+{
+    return glfs_zerofill(fd, offset, size);
+}
+
+#else
+static inline int gluster_supports_zerofill(void)
+{
+    return 0;
+}
+
+static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
+        int64_t size)
+{
+    return 0;
+}
 #endif
 
 static int qemu_gluster_create(const char *filename,
@@ -372,6 +395,7 @@ static int qemu_gluster_create(const char *filename,
     struct glfs *glfs;
     struct glfs_fd *fd;
     int ret = 0;
+    int prealloc = 0;
     int64_t total_size = 0;
     GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
 
@@ -384,6 +408,19 @@ static int qemu_gluster_create(const char *filename,
     while (options && options->name) {
         if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
             total_size = options->value.n / BDRV_SECTOR_SIZE;
+        } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
+            if (!options->value.s || !strcmp(options->value.s, "off")) {
+                prealloc = 0;
+            } else if (!strcmp(options->value.s, "full") &&
+                    gluster_supports_zerofill()) {
+                prealloc = 1;
+            } else {
+                error_setg(errp, "Invalid preallocation mode: '%s'"
+                    " or GlusterFS doesn't support zerofill API",
+                           options->value.s);
+                ret = -EINVAL;
+                goto out;
+            }
         }
         options++;
     }
@@ -393,9 +430,15 @@ static int qemu_gluster_create(const char *filename,
     if (!fd) {
         ret = -errno;
     } else {
-        if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
+        if (!glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE)) {
+            if (prealloc && qemu_gluster_zerofill(fd, 0,
+                    total_size * BDRV_SECTOR_SIZE)) {
+                ret = -errno;
+            }
+        } else {
             ret = -errno;
         }
+
         if (glfs_close(fd) != 0) {
             ret = -errno;
         }
@@ -579,6 +622,11 @@ static QEMUOptionParameter qemu_gluster_create_options[] = {
         .type = OPT_SIZE,
         .help = "Virtual disk size"
     },
+    {
+        .name = BLOCK_OPT_PREALLOC,
+        .type = OPT_STRING,
+        .help = "Preallocation mode (allowed values: off, full)"
+    },
     { NULL }
 };
 
-- 
1.7.11.7

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v1 1/3] gluster: Convert aio routines into coroutines
  2013-12-05 11:01 ` [Qemu-devel] [RFC PATCH v1 1/3] gluster: Convert aio routines into coroutines Bharata B Rao
@ 2013-12-16 16:33   ` Stefan Hajnoczi
  0 siblings, 0 replies; 6+ messages in thread
From: Stefan Hajnoczi @ 2013-12-16 16:33 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: kwolf, jcody, qemu-devel, stefanha

On Thu, Dec 05, 2013 at 04:31:26PM +0530, Bharata B Rao wrote:
> -static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s)
> +static void qemu_gluster_complete_aio(void *opaque)
>  {
> -    int ret;
> -    bool *finished = acb->finished;
> -    BlockDriverCompletionFunc *cb = acb->common.cb;
> -    void *opaque = acb->common.opaque;
> -
> -    if (!acb->ret || acb->ret == acb->size) {
> -        ret = 0; /* Success */
> -    } else if (acb->ret < 0) {
> -        ret = acb->ret; /* Read/Write failed */
> -    } else {
> -        ret = -EIO; /* Partial read/write - fail it */
> -    }
[...]
> +    if (acb->ret == acb->size) {
> +        acb->ret = 0;
> +    } else if (acb->ret > 0) {
> +        acb->ret = -EIO; /* Partial read/write - fail it */
>      }

This change is a little ugly since qemu_gluster_complete_aio() now
modifies acb->ret in-place.  I suggest moving the if statements down
into gluster_finish_aiocb() where we first receive the request's return
value.  Then qemu_gluster_complete_aio() simply enters the coroutine and
doesn't modify acb->ret.

>  static const AIOCBInfo gluster_aiocb_info = {
>      .aiocb_size = sizeof(GlusterAIOCB),
> -    .cancel = qemu_gluster_aio_cancel,
>  };

At this point using BlockDriverAIOCB and qemu_aio_get() becomes
questionable.  We no longer implement .cancel() because we don't need
the aio interface.

It would be cleaner to manage our own request struct and allocate using
g_slice_new()/g_slice_free().  That way we don't "reuse"
BlockDriverAIOCB without fully implementing the AIOCBInfo interface.

Stefan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v1 3/3] gluster: Add support for creating zero-filled image
  2013-12-05 11:01 ` [Qemu-devel] [RFC PATCH v1 3/3] gluster: Add support for creating zero-filled image Bharata B Rao
@ 2013-12-16 16:39   ` Stefan Hajnoczi
  0 siblings, 0 replies; 6+ messages in thread
From: Stefan Hajnoczi @ 2013-12-16 16:39 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: kwolf, jcody, qemu-devel, stefanha

On Thu, Dec 05, 2013 at 04:31:28PM +0530, Bharata B Rao wrote:
> @@ -364,6 +364,29 @@ out:
>      qemu_aio_release(acb);
>      return ret;
>  }
> +
> +static inline int gluster_supports_zerofill(void)

Please use bool.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2013-12-16 16:40 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-12-05 11:01 [Qemu-devel] [RFC PATCH v1 0/3] gluster: conversion to coroutines and supporting write_zeroes Bharata B Rao
2013-12-05 11:01 ` [Qemu-devel] [RFC PATCH v1 1/3] gluster: Convert aio routines into coroutines Bharata B Rao
2013-12-16 16:33   ` Stefan Hajnoczi
2013-12-05 11:01 ` [Qemu-devel] [RFC PATCH v1 2/3] gluster: Implement .bdrv_co_write_zeroes for gluster Bharata B Rao
2013-12-05 11:01 ` [Qemu-devel] [RFC PATCH v1 3/3] gluster: Add support for creating zero-filled image Bharata B Rao
2013-12-16 16:39   ` Stefan Hajnoczi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).