* [Qemu-devel] [RFC PATCH v0 0/3] gluster: conversion to coroutines and supporting write_zeroes @ 2013-11-22 7:16 Bharata B Rao 2013-11-22 7:16 ` [Qemu-devel] [RFC PATCH v0 1/3] gluster: Convert aio routines into coroutines Bharata B Rao ` (2 more replies) 0 siblings, 3 replies; 9+ messages in thread From: Bharata B Rao @ 2013-11-22 7:16 UTC (permalink / raw) To: qemu-devel; +Cc: kwolf, stefanha, Bharata B Rao Hi, This series is about converting all the bdrv_aio* implementations in gluster driver to coroutine based implementations. Read, write, flush and discard routines are converted. This also adds support for .bdrv_co_write_zeroes() in gluster and provides a new preallocation option with qemu-img (-o preallocation=full) that can be used for raw images on GlusterFS backend to create fully allocated and zero-filled images. Bharata B Rao (3): gluster: Convert aio routines into coroutines gluster: Implement .bdrv_co_write_zeroes for gluster gluster: Add support for creating zero-filled image block/gluster.c | 298 ++++++++++++++++++++++++++++++++------------------------ configure | 8 ++ 2 files changed, 181 insertions(+), 125 deletions(-) -- 1.7.11.7 ^ permalink raw reply [flat|nested] 9+ messages in thread
* [Qemu-devel] [RFC PATCH v0 1/3] gluster: Convert aio routines into coroutines 2013-11-22 7:16 [Qemu-devel] [RFC PATCH v0 0/3] gluster: conversion to coroutines and supporting write_zeroes Bharata B Rao @ 2013-11-22 7:16 ` Bharata B Rao 2013-12-03 14:04 ` Stefan Hajnoczi 2013-11-22 7:16 ` [Qemu-devel] [RFC PATCH v0 2/3] gluster: Implement .bdrv_co_write_zeroes for gluster Bharata B Rao 2013-11-22 7:16 ` [Qemu-devel] [RFC PATCH v0 3/3] gluster: Add support for creating zero-filled image Bharata B Rao 2 siblings, 1 reply; 9+ messages in thread From: Bharata B Rao @ 2013-11-22 7:16 UTC (permalink / raw) To: qemu-devel; +Cc: kwolf, stefanha, Bharata B Rao Convert the read, write, flush and discard implementations from aio-based ones to coroutine based ones. Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> --- block/gluster.c | 168 +++++++++++++++++++++----------------------------------- 1 file changed, 63 insertions(+), 105 deletions(-) diff --git a/block/gluster.c b/block/gluster.c index 877686a..9f85228 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -26,11 +26,11 @@ typedef struct GlusterAIOCB { int ret; bool *finished; QEMUBH *bh; + Coroutine *coroutine; } GlusterAIOCB; typedef struct BDRVGlusterState { struct glfs *glfs; - int fds[2]; struct glfs_fd *fd; int event_reader_pos; GlusterAIOCB *event_acb; @@ -231,46 +231,23 @@ out: return NULL; } -static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s) +static void qemu_gluster_complete_aio(void *opaque) { - int ret; - bool *finished = acb->finished; - BlockDriverCompletionFunc *cb = acb->common.cb; - void *opaque = acb->common.opaque; - - if (!acb->ret || acb->ret == acb->size) { - ret = 0; /* Success */ - } else if (acb->ret < 0) { - ret = acb->ret; /* Read/Write failed */ - } else { - ret = -EIO; /* Partial read/write - fail it */ - } + GlusterAIOCB *acb = (GlusterAIOCB *)opaque; - qemu_aio_release(acb); - cb(opaque, ret); - if (finished) { - *finished = true; + if (acb->ret == acb->size) { + acb->ret = 0; + } else if (acb->ret > 0) { + acb->ret = -EIO; /* Partial read/write - fail it */ } -} -static void qemu_gluster_aio_event_reader(void *opaque) -{ - BDRVGlusterState *s = opaque; - ssize_t ret; - - do { - char *p = (char *)&s->event_acb; - - ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos, - sizeof(s->event_acb) - s->event_reader_pos); - if (ret > 0) { - s->event_reader_pos += ret; - if (s->event_reader_pos == sizeof(s->event_acb)) { - s->event_reader_pos = 0; - qemu_gluster_complete_aio(s->event_acb, s); - } - } - } while (ret < 0 && errno == EINTR); + qemu_bh_delete(acb->bh); + acb->bh = NULL; + qemu_coroutine_enter(acb->coroutine, NULL); + if (acb->finished) { + *acb->finished = true; + } + qemu_aio_release(acb); } /* TODO Convert to fine grained options */ @@ -309,7 +286,6 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, filename = qemu_opt_get(opts, "filename"); - s->glfs = qemu_gluster_init(gconf, filename); if (!s->glfs) { ret = -errno; @@ -329,18 +305,8 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, s->fd = glfs_open(s->glfs, gconf->image, open_flags); if (!s->fd) { ret = -errno; - goto out; } - ret = qemu_pipe(s->fds); - if (ret < 0) { - ret = -errno; - goto out; - } - fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK); - qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], - qemu_gluster_aio_event_reader, NULL, s); - out: qemu_opts_del(opts); qemu_gluster_gconf_free(gconf); @@ -414,28 +380,20 @@ static const AIOCBInfo gluster_aiocb_info = { .cancel = qemu_gluster_aio_cancel, }; +/* + * AIO callback routine called from GlusterFS thread. + */ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) { GlusterAIOCB *acb = (GlusterAIOCB *)arg; - BlockDriverState *bs = acb->common.bs; - BDRVGlusterState *s = bs->opaque; - int retval; acb->ret = ret; - retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb)); - if (retval != sizeof(acb)) { - /* - * Gluster AIO callback thread failed to notify the waiting - * QEMU thread about IO completion. - */ - error_report("Gluster AIO completion failed: %s", strerror(errno)); - abort(); - } + acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb); + qemu_bh_schedule(acb->bh); } -static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque, int write) +static coroutine_fn int qemu_gluster_aio_rw(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write) { int ret; GlusterAIOCB *acb; @@ -446,10 +404,11 @@ static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs, offset = sector_num * BDRV_SECTOR_SIZE; size = nb_sectors * BDRV_SECTOR_SIZE; - acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque); + acb = qemu_aio_get(&gluster_aiocb_info, bs, NULL, NULL); acb->size = size; acb->ret = 0; acb->finished = NULL; + acb->coroutine = qemu_coroutine_self(); if (write) { ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0, @@ -462,11 +421,13 @@ static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs, if (ret < 0) { goto out; } - return &acb->common; + + qemu_coroutine_yield(); + return acb->ret; out: qemu_aio_release(acb); - return NULL; + return ret; } static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset) @@ -482,47 +443,46 @@ static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset) return 0; } -static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) +static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { - return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); + return qemu_gluster_aio_rw(bs, sector_num, nb_sectors, qiov, 0); } -static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) +static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { - return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); + return qemu_gluster_aio_rw(bs, sector_num, nb_sectors, qiov, 1); } -static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs, - BlockDriverCompletionFunc *cb, void *opaque) +static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs) { int ret; GlusterAIOCB *acb; BDRVGlusterState *s = bs->opaque; - acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque); + acb = qemu_aio_get(&gluster_aiocb_info, bs, NULL, NULL); acb->size = 0; acb->ret = 0; acb->finished = NULL; + acb->coroutine = qemu_coroutine_self(); ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb); if (ret < 0) { goto out; } - return &acb->common; + + qemu_coroutine_yield(); + return acb->ret; out: qemu_aio_release(acb); - return NULL; + return ret; } #ifdef CONFIG_GLUSTERFS_DISCARD -static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, BlockDriverCompletionFunc *cb, - void *opaque) +static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) { int ret; GlusterAIOCB *acb; @@ -533,20 +493,22 @@ static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs, offset = sector_num * BDRV_SECTOR_SIZE; size = nb_sectors * BDRV_SECTOR_SIZE; - acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque); + acb = qemu_aio_get(&gluster_aiocb_info, bs, NULL, NULL); acb->size = 0; acb->ret = 0; acb->finished = NULL; + acb->coroutine = qemu_coroutine_self(); ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb); if (ret < 0) { goto out; } - return &acb->common; + qemu_coroutine_yield(); + return acb->ret; out: qemu_aio_release(acb); - return NULL; + return ret; } #endif @@ -581,10 +543,6 @@ static void qemu_gluster_close(BlockDriverState *bs) { BDRVGlusterState *s = bs->opaque; - close(s->fds[GLUSTER_FD_READ]); - close(s->fds[GLUSTER_FD_WRITE]); - qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL); - if (s->fd) { glfs_close(s->fd); s->fd = NULL; @@ -618,12 +576,12 @@ static BlockDriver bdrv_gluster = { .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, .bdrv_truncate = qemu_gluster_truncate, - .bdrv_aio_readv = qemu_gluster_aio_readv, - .bdrv_aio_writev = qemu_gluster_aio_writev, - .bdrv_aio_flush = qemu_gluster_aio_flush, + .bdrv_co_readv = qemu_gluster_co_readv, + .bdrv_co_writev = qemu_gluster_co_writev, + .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, .bdrv_has_zero_init = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD - .bdrv_aio_discard = qemu_gluster_aio_discard, + .bdrv_co_discard = qemu_gluster_co_discard, #endif .create_options = qemu_gluster_create_options, }; @@ -639,12 +597,12 @@ static BlockDriver bdrv_gluster_tcp = { .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, .bdrv_truncate = qemu_gluster_truncate, - .bdrv_aio_readv = qemu_gluster_aio_readv, - .bdrv_aio_writev = qemu_gluster_aio_writev, - .bdrv_aio_flush = qemu_gluster_aio_flush, + .bdrv_co_readv = qemu_gluster_co_readv, + .bdrv_co_writev = qemu_gluster_co_writev, + .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, .bdrv_has_zero_init = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD - .bdrv_aio_discard = qemu_gluster_aio_discard, + .bdrv_co_discard = qemu_gluster_co_discard, #endif .create_options = qemu_gluster_create_options, }; @@ -660,12 +618,12 @@ static BlockDriver bdrv_gluster_unix = { .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, .bdrv_truncate = qemu_gluster_truncate, - .bdrv_aio_readv = qemu_gluster_aio_readv, - .bdrv_aio_writev = qemu_gluster_aio_writev, - .bdrv_aio_flush = qemu_gluster_aio_flush, + .bdrv_co_readv = qemu_gluster_co_readv, + .bdrv_co_writev = qemu_gluster_co_writev, + .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, .bdrv_has_zero_init = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD - .bdrv_aio_discard = qemu_gluster_aio_discard, + .bdrv_co_discard = qemu_gluster_co_discard, #endif .create_options = qemu_gluster_create_options, }; @@ -681,12 +639,12 @@ static BlockDriver bdrv_gluster_rdma = { .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, .bdrv_truncate = qemu_gluster_truncate, - .bdrv_aio_readv = qemu_gluster_aio_readv, - .bdrv_aio_writev = qemu_gluster_aio_writev, - .bdrv_aio_flush = qemu_gluster_aio_flush, + .bdrv_co_readv = qemu_gluster_co_readv, + .bdrv_co_writev = qemu_gluster_co_writev, + .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, .bdrv_has_zero_init = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD - .bdrv_aio_discard = qemu_gluster_aio_discard, + .bdrv_co_discard = qemu_gluster_co_discard, #endif .create_options = qemu_gluster_create_options, }; -- 1.7.11.7 ^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v0 1/3] gluster: Convert aio routines into coroutines 2013-11-22 7:16 ` [Qemu-devel] [RFC PATCH v0 1/3] gluster: Convert aio routines into coroutines Bharata B Rao @ 2013-12-03 14:04 ` Stefan Hajnoczi 2013-12-05 10:42 ` Bharata B Rao 0 siblings, 1 reply; 9+ messages in thread From: Stefan Hajnoczi @ 2013-12-03 14:04 UTC (permalink / raw) To: Bharata B Rao; +Cc: kwolf, qemu-devel, stefanha On Fri, Nov 22, 2013 at 12:46:16PM +0530, Bharata B Rao wrote: > + qemu_bh_delete(acb->bh); > + acb->bh = NULL; > + qemu_coroutine_enter(acb->coroutine, NULL); > + if (acb->finished) { > + *acb->finished = true; > + } Now that aio interfaces are gone ->finished and cancellation can be removed. > + qemu_aio_release(acb); Please do this in the functions that called qemu_aio_get(). Coroutines may yield so it's a little risky to assume the coroutine has finished accessing acb. > -static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs, > - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, > - BlockDriverCompletionFunc *cb, void *opaque, int write) > +static coroutine_fn int qemu_gluster_aio_rw(BlockDriverState *bs, > + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write) Please rename this to qemu_gluster_co_rw() since it isn't aio anymore and doesn't return a BlockDriverAIOCB. ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v0 1/3] gluster: Convert aio routines into coroutines 2013-12-03 14:04 ` Stefan Hajnoczi @ 2013-12-05 10:42 ` Bharata B Rao 0 siblings, 0 replies; 9+ messages in thread From: Bharata B Rao @ 2013-12-05 10:42 UTC (permalink / raw) To: Stefan Hajnoczi; +Cc: kwolf, qemu-devel, stefanha On Tue, Dec 03, 2013 at 03:04:01PM +0100, Stefan Hajnoczi wrote: > On Fri, Nov 22, 2013 at 12:46:16PM +0530, Bharata B Rao wrote: > > + qemu_bh_delete(acb->bh); > > + acb->bh = NULL; > > + qemu_coroutine_enter(acb->coroutine, NULL); > > + if (acb->finished) { > > + *acb->finished = true; > > + } > > Now that aio interfaces are gone ->finished and cancellation can be > removed. > > > + qemu_aio_release(acb); > > Please do this in the functions that called qemu_aio_get(). Coroutines > may yield so it's a little risky to assume the coroutine has finished > accessing acb. > > -static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs, > > - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, > > - BlockDriverCompletionFunc *cb, void *opaque, int write) > > +static coroutine_fn int qemu_gluster_aio_rw(BlockDriverState *bs, > > + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write) > > Please rename this to qemu_gluster_co_rw() since it isn't aio anymore > and doesn't return a BlockDriverAIOCB. Thanks will address these in v1. Regards, Bharata. ^ permalink raw reply [flat|nested] 9+ messages in thread
* [Qemu-devel] [RFC PATCH v0 2/3] gluster: Implement .bdrv_co_write_zeroes for gluster 2013-11-22 7:16 [Qemu-devel] [RFC PATCH v0 0/3] gluster: conversion to coroutines and supporting write_zeroes Bharata B Rao 2013-11-22 7:16 ` [Qemu-devel] [RFC PATCH v0 1/3] gluster: Convert aio routines into coroutines Bharata B Rao @ 2013-11-22 7:16 ` Bharata B Rao 2013-12-04 19:16 ` Jeff Cody 2013-11-22 7:16 ` [Qemu-devel] [RFC PATCH v0 3/3] gluster: Add support for creating zero-filled image Bharata B Rao 2 siblings, 1 reply; 9+ messages in thread From: Bharata B Rao @ 2013-11-22 7:16 UTC (permalink / raw) To: qemu-devel; +Cc: kwolf, stefanha, Bharata B Rao Support .bdrv_co_write_zeroes() from gluster driver by using GlusterFS API glfs_zerofill() that off-loads the writing of zeroes to GlusterFS server. Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> --- block/gluster.c | 101 ++++++++++++++++++++++++++++++++++++++++---------------- configure | 8 +++++ 2 files changed, 81 insertions(+), 28 deletions(-) diff --git a/block/gluster.c b/block/gluster.c index 9f85228..15f5dfb 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -250,6 +250,34 @@ static void qemu_gluster_complete_aio(void *opaque) qemu_aio_release(acb); } +/* + * AIO callback routine called from GlusterFS thread. + */ +static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) +{ + GlusterAIOCB *acb = (GlusterAIOCB *)arg; + + acb->ret = ret; + acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb); + qemu_bh_schedule(acb->bh); +} + +static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb) +{ + GlusterAIOCB *acb = (GlusterAIOCB *)blockacb; + bool finished = false; + + acb->finished = &finished; + while (!finished) { + qemu_aio_wait(); + } +} + +static const AIOCBInfo gluster_aiocb_info = { + .aiocb_size = sizeof(GlusterAIOCB), + .cancel = qemu_gluster_aio_cancel, +}; + /* TODO Convert to fine grained options */ static QemuOptsList runtime_opts = { .name = "gluster", @@ -322,6 +350,39 @@ out: return ret; } +#ifdef CONFIG_GLUSTERFS_ZEROFILL +static int qemu_gluster_co_write_zeroes(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) +{ + int ret; + GlusterAIOCB *acb; + BDRVGlusterState *s = bs->opaque; + off_t size; + off_t offset; + + offset = sector_num * BDRV_SECTOR_SIZE; + size = nb_sectors * BDRV_SECTOR_SIZE; + + acb = qemu_aio_get(&gluster_aiocb_info, bs, NULL, NULL); + acb->size = size; + acb->ret = 0; + acb->finished = NULL; + acb->coroutine = qemu_coroutine_self(); + + ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb); + if (ret < 0) { + goto out; + } + + qemu_coroutine_yield(); + return acb->ret; + +out: + qemu_aio_release(acb); + return ret; +} +#endif + static int qemu_gluster_create(const char *filename, QEMUOptionParameter *options, Error **errp) { @@ -364,34 +425,6 @@ out: return ret; } -static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb) -{ - GlusterAIOCB *acb = (GlusterAIOCB *)blockacb; - bool finished = false; - - acb->finished = &finished; - while (!finished) { - qemu_aio_wait(); - } -} - -static const AIOCBInfo gluster_aiocb_info = { - .aiocb_size = sizeof(GlusterAIOCB), - .cancel = qemu_gluster_aio_cancel, -}; - -/* - * AIO callback routine called from GlusterFS thread. - */ -static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) -{ - GlusterAIOCB *acb = (GlusterAIOCB *)arg; - - acb->ret = ret; - acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb); - qemu_bh_schedule(acb->bh); -} - static coroutine_fn int qemu_gluster_aio_rw(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write) { @@ -583,6 +616,9 @@ static BlockDriver bdrv_gluster = { #ifdef CONFIG_GLUSTERFS_DISCARD .bdrv_co_discard = qemu_gluster_co_discard, #endif +#ifdef CONFIG_GLUSTERFS_ZEROFILL + .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes, +#endif .create_options = qemu_gluster_create_options, }; @@ -604,6 +640,9 @@ static BlockDriver bdrv_gluster_tcp = { #ifdef CONFIG_GLUSTERFS_DISCARD .bdrv_co_discard = qemu_gluster_co_discard, #endif +#ifdef CONFIG_GLUSTERFS_ZEROFILL + .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes, +#endif .create_options = qemu_gluster_create_options, }; @@ -625,6 +664,9 @@ static BlockDriver bdrv_gluster_unix = { #ifdef CONFIG_GLUSTERFS_DISCARD .bdrv_co_discard = qemu_gluster_co_discard, #endif +#ifdef CONFIG_GLUSTERFS_ZEROFILL + .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes, +#endif .create_options = qemu_gluster_create_options, }; @@ -646,6 +688,9 @@ static BlockDriver bdrv_gluster_rdma = { #ifdef CONFIG_GLUSTERFS_DISCARD .bdrv_co_discard = qemu_gluster_co_discard, #endif +#ifdef CONFIG_GLUSTERFS_ZEROFILL + .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes, +#endif .create_options = qemu_gluster_create_options, }; diff --git a/configure b/configure index 508f6a5..3c267a4 100755 --- a/configure +++ b/configure @@ -255,6 +255,7 @@ coroutine_pool="" seccomp="" glusterfs="" glusterfs_discard="no" +glusterfs_zerofill="no" virtio_blk_data_plane="" gtk="" gtkabi="2.0" @@ -2670,6 +2671,9 @@ if test "$glusterfs" != "no" ; then if $pkg_config --atleast-version=5 glusterfs-api; then glusterfs_discard="yes" fi + if $pkg_config --atleast-version=6 glusterfs-api; then + glusterfs_zerofill="yes" + fi else if test "$glusterfs" = "yes" ; then feature_not_found "GlusterFS backend support" @@ -4171,6 +4175,10 @@ if test "$glusterfs_discard" = "yes" ; then echo "CONFIG_GLUSTERFS_DISCARD=y" >> $config_host_mak fi +if test "$glusterfs_zerofill" = "yes" ; then + echo "CONFIG_GLUSTERFS_ZEROFILL=y" >> $config_host_mak +fi + if test "$libssh2" = "yes" ; then echo "CONFIG_LIBSSH2=y" >> $config_host_mak fi -- 1.7.11.7 ^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v0 2/3] gluster: Implement .bdrv_co_write_zeroes for gluster 2013-11-22 7:16 ` [Qemu-devel] [RFC PATCH v0 2/3] gluster: Implement .bdrv_co_write_zeroes for gluster Bharata B Rao @ 2013-12-04 19:16 ` Jeff Cody 2013-12-05 10:45 ` Bharata B Rao 0 siblings, 1 reply; 9+ messages in thread From: Jeff Cody @ 2013-12-04 19:16 UTC (permalink / raw) To: Bharata B Rao; +Cc: kwolf, qemu-devel, stefanha On Fri, Nov 22, 2013 at 12:46:17PM +0530, Bharata B Rao wrote: > Support .bdrv_co_write_zeroes() from gluster driver by using GlusterFS API > glfs_zerofill() that off-loads the writing of zeroes to GlusterFS server. > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > --- > block/gluster.c | 101 ++++++++++++++++++++++++++++++++++++++++---------------- > configure | 8 +++++ > 2 files changed, 81 insertions(+), 28 deletions(-) > > diff --git a/block/gluster.c b/block/gluster.c > index 9f85228..15f5dfb 100644 > --- a/block/gluster.c > +++ b/block/gluster.c > @@ -250,6 +250,34 @@ static void qemu_gluster_complete_aio(void *opaque) > qemu_aio_release(acb); > } > > +/* > + * AIO callback routine called from GlusterFS thread. > + */ > +static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) > +{ > + GlusterAIOCB *acb = (GlusterAIOCB *)arg; > + > + acb->ret = ret; > + acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb); > + qemu_bh_schedule(acb->bh); > +} > + > +static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb) > +{ > + GlusterAIOCB *acb = (GlusterAIOCB *)blockacb; > + bool finished = false; > + > + acb->finished = &finished; > + while (!finished) { > + qemu_aio_wait(); > + } > +} > + > +static const AIOCBInfo gluster_aiocb_info = { > + .aiocb_size = sizeof(GlusterAIOCB), > + .cancel = qemu_gluster_aio_cancel, > +}; > + > /* TODO Convert to fine grained options */ > static QemuOptsList runtime_opts = { > .name = "gluster", > @@ -322,6 +350,39 @@ out: > return ret; > } > > +#ifdef CONFIG_GLUSTERFS_ZEROFILL > +static int qemu_gluster_co_write_zeroes(BlockDriverState *bs, > + int64_t sector_num, int nb_sectors) > +{ > + int ret; > + GlusterAIOCB *acb; > + BDRVGlusterState *s = bs->opaque; > + off_t size; > + off_t offset; > + > + offset = sector_num * BDRV_SECTOR_SIZE; > + size = nb_sectors * BDRV_SECTOR_SIZE; > + > + acb = qemu_aio_get(&gluster_aiocb_info, bs, NULL, NULL); > + acb->size = size; > + acb->ret = 0; > + acb->finished = NULL; > + acb->coroutine = qemu_coroutine_self(); > + > + ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb); > + if (ret < 0) { I believe glfs_zerofill_async returns -1 on failure, and sets errno. In that case, we should set ret = -errno here. > + goto out; > + } > + > + qemu_coroutine_yield(); > + return acb->ret; > + > +out: > + qemu_aio_release(acb); > + return ret; > +} > +#endif > + > static int qemu_gluster_create(const char *filename, > QEMUOptionParameter *options, Error **errp) > { > @@ -364,34 +425,6 @@ out: > return ret; > } > > -static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb) > -{ > - GlusterAIOCB *acb = (GlusterAIOCB *)blockacb; > - bool finished = false; > - > - acb->finished = &finished; > - while (!finished) { > - qemu_aio_wait(); > - } > -} > - > -static const AIOCBInfo gluster_aiocb_info = { > - .aiocb_size = sizeof(GlusterAIOCB), > - .cancel = qemu_gluster_aio_cancel, > -}; > - > -/* > - * AIO callback routine called from GlusterFS thread. > - */ > -static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) > -{ > - GlusterAIOCB *acb = (GlusterAIOCB *)arg; > - > - acb->ret = ret; > - acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb); > - qemu_bh_schedule(acb->bh); > -} > - > static coroutine_fn int qemu_gluster_aio_rw(BlockDriverState *bs, > int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write) > { > @@ -583,6 +616,9 @@ static BlockDriver bdrv_gluster = { > #ifdef CONFIG_GLUSTERFS_DISCARD > .bdrv_co_discard = qemu_gluster_co_discard, > #endif > +#ifdef CONFIG_GLUSTERFS_ZEROFILL > + .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes, > +#endif > .create_options = qemu_gluster_create_options, > }; > > @@ -604,6 +640,9 @@ static BlockDriver bdrv_gluster_tcp = { > #ifdef CONFIG_GLUSTERFS_DISCARD > .bdrv_co_discard = qemu_gluster_co_discard, > #endif > +#ifdef CONFIG_GLUSTERFS_ZEROFILL > + .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes, > +#endif > .create_options = qemu_gluster_create_options, > }; > > @@ -625,6 +664,9 @@ static BlockDriver bdrv_gluster_unix = { > #ifdef CONFIG_GLUSTERFS_DISCARD > .bdrv_co_discard = qemu_gluster_co_discard, > #endif > +#ifdef CONFIG_GLUSTERFS_ZEROFILL > + .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes, > +#endif > .create_options = qemu_gluster_create_options, > }; > > @@ -646,6 +688,9 @@ static BlockDriver bdrv_gluster_rdma = { > #ifdef CONFIG_GLUSTERFS_DISCARD > .bdrv_co_discard = qemu_gluster_co_discard, > #endif > +#ifdef CONFIG_GLUSTERFS_ZEROFILL > + .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes, > +#endif > .create_options = qemu_gluster_create_options, > }; > > diff --git a/configure b/configure > index 508f6a5..3c267a4 100755 > --- a/configure > +++ b/configure > @@ -255,6 +255,7 @@ coroutine_pool="" > seccomp="" > glusterfs="" > glusterfs_discard="no" > +glusterfs_zerofill="no" > virtio_blk_data_plane="" > gtk="" > gtkabi="2.0" > @@ -2670,6 +2671,9 @@ if test "$glusterfs" != "no" ; then > if $pkg_config --atleast-version=5 glusterfs-api; then > glusterfs_discard="yes" > fi > + if $pkg_config --atleast-version=6 glusterfs-api; then > + glusterfs_zerofill="yes" > + fi > else > if test "$glusterfs" = "yes" ; then > feature_not_found "GlusterFS backend support" > @@ -4171,6 +4175,10 @@ if test "$glusterfs_discard" = "yes" ; then > echo "CONFIG_GLUSTERFS_DISCARD=y" >> $config_host_mak > fi > > +if test "$glusterfs_zerofill" = "yes" ; then > + echo "CONFIG_GLUSTERFS_ZEROFILL=y" >> $config_host_mak > +fi > + > if test "$libssh2" = "yes" ; then > echo "CONFIG_LIBSSH2=y" >> $config_host_mak > fi > -- > 1.7.11.7 > > ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v0 2/3] gluster: Implement .bdrv_co_write_zeroes for gluster 2013-12-04 19:16 ` Jeff Cody @ 2013-12-05 10:45 ` Bharata B Rao 0 siblings, 0 replies; 9+ messages in thread From: Bharata B Rao @ 2013-12-05 10:45 UTC (permalink / raw) To: Jeff Cody; +Cc: kwolf, qemu-devel, stefanha On Wed, Dec 04, 2013 at 02:16:28PM -0500, Jeff Cody wrote: > On Fri, Nov 22, 2013 at 12:46:17PM +0530, Bharata B Rao wrote: > > + > > + ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb); > > + if (ret < 0) { > > I believe glfs_zerofill_async returns -1 on failure, and sets errno. > In that case, we should set ret = -errno here. This needs to be done for other routines too. Will address this and the other comment you have given in 2/3 thread. Thanks. Regards, Bharata. ^ permalink raw reply [flat|nested] 9+ messages in thread
* [Qemu-devel] [RFC PATCH v0 3/3] gluster: Add support for creating zero-filled image 2013-11-22 7:16 [Qemu-devel] [RFC PATCH v0 0/3] gluster: conversion to coroutines and supporting write_zeroes Bharata B Rao 2013-11-22 7:16 ` [Qemu-devel] [RFC PATCH v0 1/3] gluster: Convert aio routines into coroutines Bharata B Rao 2013-11-22 7:16 ` [Qemu-devel] [RFC PATCH v0 2/3] gluster: Implement .bdrv_co_write_zeroes for gluster Bharata B Rao @ 2013-11-22 7:16 ` Bharata B Rao 2013-12-04 19:00 ` Jeff Cody 2 siblings, 1 reply; 9+ messages in thread From: Bharata B Rao @ 2013-11-22 7:16 UTC (permalink / raw) To: qemu-devel; +Cc: kwolf, stefanha, Bharata B Rao GlusterFS supports creation of zero-filled file on GlusterFS volume by means of an API called glfs_zerofill(). Use this API from QEMU to create an image that is filled with zeroes by using the preallocation option of qemu-img. qemu-img create gluster://server/volume/image -o preallocation=full 10G The allowed values for preallocation are 'full' and 'off'. By default preallocation is off and image is not zero-filled. glfs_zerofill() offloads the writing of zeroes to the server and if the storage supports SCSI WRITESAME, GlusterFS server can issue BLKZEROOUT ioctl to achieve the zeroing. Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> --- block/gluster.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/block/gluster.c b/block/gluster.c index 15f5dfb..2368997 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -381,6 +381,29 @@ out: qemu_aio_release(acb); return ret; } + +static inline int gluster_supports_zerofill(void) +{ + return 1; +} + +static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset, + int64_t size) +{ + return glfs_zerofill(fd, offset, size); +} + +#else +static inline int gluster_supports_zerofill(void) +{ + return 0; +} + +static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset, + int64_t size) +{ + return 0; +} #endif static int qemu_gluster_create(const char *filename, @@ -389,6 +412,7 @@ static int qemu_gluster_create(const char *filename, struct glfs *glfs; struct glfs_fd *fd; int ret = 0; + int prealloc = 0; int64_t total_size = 0; GlusterConf *gconf = g_malloc0(sizeof(GlusterConf)); @@ -401,6 +425,18 @@ static int qemu_gluster_create(const char *filename, while (options && options->name) { if (!strcmp(options->name, BLOCK_OPT_SIZE)) { total_size = options->value.n / BDRV_SECTOR_SIZE; + } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) { + if (!options->value.s || !strcmp(options->value.s, "off")) { + prealloc = 0; + } else if (!strcmp(options->value.s, "full") && + gluster_supports_zerofill()) { + prealloc = 1; + } else { + error_setg(errp, "Invalid preallocation mode: '%s'" + " or GlusterFS doesn't support zerofill API", + options->value.s); + return -EINVAL; + } } options++; } @@ -413,6 +449,10 @@ static int qemu_gluster_create(const char *filename, if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) { ret = -errno; } + if (prealloc && qemu_gluster_zerofill(fd, 0, + total_size * BDRV_SECTOR_SIZE)) { + ret = -errno; + } if (glfs_close(fd) != 0) { ret = -errno; } @@ -595,6 +635,11 @@ static QEMUOptionParameter qemu_gluster_create_options[] = { .type = OPT_SIZE, .help = "Virtual disk size" }, + { + .name = BLOCK_OPT_PREALLOC, + .type = OPT_STRING, + .help = "Preallocation mode (allowed values: off, on)" + }, { NULL } }; -- 1.7.11.7 ^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v0 3/3] gluster: Add support for creating zero-filled image 2013-11-22 7:16 ` [Qemu-devel] [RFC PATCH v0 3/3] gluster: Add support for creating zero-filled image Bharata B Rao @ 2013-12-04 19:00 ` Jeff Cody 0 siblings, 0 replies; 9+ messages in thread From: Jeff Cody @ 2013-12-04 19:00 UTC (permalink / raw) To: Bharata B Rao; +Cc: kwolf, qemu-devel, stefanha On Fri, Nov 22, 2013 at 12:46:18PM +0530, Bharata B Rao wrote: > GlusterFS supports creation of zero-filled file on GlusterFS volume > by means of an API called glfs_zerofill(). Use this API from QEMU to > create an image that is filled with zeroes by using the preallocation > option of qemu-img. > > qemu-img create gluster://server/volume/image -o preallocation=full 10G > > The allowed values for preallocation are 'full' and 'off'. By default > preallocation is off and image is not zero-filled. > > glfs_zerofill() offloads the writing of zeroes to the server and if > the storage supports SCSI WRITESAME, GlusterFS server can issue > BLKZEROOUT ioctl to achieve the zeroing. > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > --- > block/gluster.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 45 insertions(+) > > diff --git a/block/gluster.c b/block/gluster.c > index 15f5dfb..2368997 100644 > --- a/block/gluster.c > +++ b/block/gluster.c > @@ -381,6 +381,29 @@ out: > qemu_aio_release(acb); > return ret; > } > + > +static inline int gluster_supports_zerofill(void) > +{ > + return 1; > +} > + > +static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset, > + int64_t size) > +{ > + return glfs_zerofill(fd, offset, size); > +} > + > +#else > +static inline int gluster_supports_zerofill(void) > +{ > + return 0; > +} > + > +static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset, > + int64_t size) > +{ > + return 0; > +} > #endif > > static int qemu_gluster_create(const char *filename, > @@ -389,6 +412,7 @@ static int qemu_gluster_create(const char *filename, > struct glfs *glfs; > struct glfs_fd *fd; > int ret = 0; > + int prealloc = 0; > int64_t total_size = 0; > GlusterConf *gconf = g_malloc0(sizeof(GlusterConf)); > > @@ -401,6 +425,18 @@ static int qemu_gluster_create(const char *filename, > while (options && options->name) { > if (!strcmp(options->name, BLOCK_OPT_SIZE)) { > total_size = options->value.n / BDRV_SECTOR_SIZE; > + } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) { > + if (!options->value.s || !strcmp(options->value.s, "off")) { > + prealloc = 0; > + } else if (!strcmp(options->value.s, "full") && > + gluster_supports_zerofill()) { > + prealloc = 1; > + } else { > + error_setg(errp, "Invalid preallocation mode: '%s'" > + " or GlusterFS doesn't support zerofill API", > + options->value.s); > + return -EINVAL; This leaks gconf. While probably not a huge deal for a .bdrv_create() implementation, you should still set ret, and perform a 'goto out' here. > + } > } > options++; > } > @@ -413,6 +449,10 @@ static int qemu_gluster_create(const char *filename, > if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) { > ret = -errno; > } > + if (prealloc && qemu_gluster_zerofill(fd, 0, > + total_size * BDRV_SECTOR_SIZE)) { > + ret = -errno; > + } > if (glfs_close(fd) != 0) { > ret = -errno; > } > @@ -595,6 +635,11 @@ static QEMUOptionParameter qemu_gluster_create_options[] = { > .type = OPT_SIZE, > .help = "Virtual disk size" > }, > + { > + .name = BLOCK_OPT_PREALLOC, > + .type = OPT_STRING, > + .help = "Preallocation mode (allowed values: off, on)" > + }, > { NULL } > }; > > -- > 1.7.11.7 > > ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2013-12-05 12:05 UTC | newest] Thread overview: 9+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2013-11-22 7:16 [Qemu-devel] [RFC PATCH v0 0/3] gluster: conversion to coroutines and supporting write_zeroes Bharata B Rao 2013-11-22 7:16 ` [Qemu-devel] [RFC PATCH v0 1/3] gluster: Convert aio routines into coroutines Bharata B Rao 2013-12-03 14:04 ` Stefan Hajnoczi 2013-12-05 10:42 ` Bharata B Rao 2013-11-22 7:16 ` [Qemu-devel] [RFC PATCH v0 2/3] gluster: Implement .bdrv_co_write_zeroes for gluster Bharata B Rao 2013-12-04 19:16 ` Jeff Cody 2013-12-05 10:45 ` Bharata B Rao 2013-11-22 7:16 ` [Qemu-devel] [RFC PATCH v0 3/3] gluster: Add support for creating zero-filled image Bharata B Rao 2013-12-04 19:00 ` Jeff Cody
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).