* [Qemu-devel] [PATCH] RBD: Add support readv,writev for rbd
@ 2016-11-05 5:17 jazeltq
2016-11-05 17:06 ` Jason Dillaman
0 siblings, 1 reply; 2+ messages in thread
From: jazeltq @ 2016-11-05 5:17 UTC (permalink / raw)
To: jdurgin, jcody, jdillama, kwolf, mreitz, qemu-block
Cc: qemu-devel, ceph-devel, tianqing
From: tianqing <tianqing@unitedstack.com>
Rbd can do readv and writev directly, so wo do not need to transform
iov to buf or vice versa any more.
Signed-off-by: tianqing <tianqing@unitedstack.com>
---
block/rbd.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 124 insertions(+)
diff --git a/block/rbd.c b/block/rbd.c
index a57b3e3..a405c02 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -53,6 +53,13 @@
#undef LIBRBD_SUPPORTS_DISCARD
#endif
+/* rbd_aio_readv, rbd_aio_writev added in 0.1.11 */
+#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 11)
+#define LIBRBD_SUPPORTS_IOV
+#else
+#undef LIBRBD_SUPPORTS_IOV
+#endif
+
#define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)
#define RBD_MAX_CONF_NAME_SIZE 128
@@ -73,7 +80,10 @@ typedef struct RBDAIOCB {
BlockAIOCB common;
int64_t ret;
QEMUIOVector *qiov;
+#ifdef LIBRBD_SUPPORTS_IOV
+#else
char *bounce;
+#endif
RBDAIOCmd cmd;
int error;
struct BDRVRBDState *s;
@@ -83,7 +93,10 @@ typedef struct RADOSCB {
RBDAIOCB *acb;
struct BDRVRBDState *s;
int64_t size;
+#ifdef LIBRBD_SUPPORTS_IOV
+#else
char *buf;
+#endif
int64_t ret;
} RADOSCB;
@@ -406,6 +419,48 @@ shutdown:
return ret;
}
+
+#ifdef LIBRBD_SUPPORTS_IOV
+/*
+ * This aio completion is being called from rbd_finish_bh() and runs in qemu
+ * BH context.
+ */
+static void qemu_rbd_complete_aio(RADOSCB *rcb)
+{
+ RBDAIOCB *acb = rcb->acb;
+ int64_t r;
+
+ r = rcb->ret;
+
+ if (acb->cmd != RBD_AIO_READ) {
+ if (r < 0) {
+ acb->ret = r;
+ acb->error = 1;
+ } else if (!acb->error) {
+ acb->ret = rcb->size;
+ }
+ } else {
+ if (r < 0) {
+ iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, acb->qiov->size);
+ acb->ret = r;
+ acb->error = 1;
+ } else if (r < rcb->size) {
+ iov_memset(acb->qiov->iov, acb->qiov->niov,
+ rcb->size - r, 0, acb->qiov->size);
+ if (!acb->error) {
+ acb->ret = rcb->size;
+ }
+ } else if (!acb->error) {
+ acb->ret = r;
+ }
+ }
+
+ g_free(rcb);
+ acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
+
+ qemu_aio_unref(acb);
+}
+#else
/*
* This aio completion is being called from rbd_finish_bh() and runs in qemu
* BH context.
@@ -449,6 +504,7 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
qemu_aio_unref(acb);
}
+#endif
/* TODO Convert to fine grained options */
static QemuOptsList runtime_opts = {
@@ -644,6 +700,73 @@ static int rbd_aio_flush_wrapper(rbd_image_t image,
#endif
}
+#ifdef LIBRBD_SUPPORTS_IOV
+static BlockAIOCB *rbd_start_aio_vec(BlockDriverState *bs,
+ int64_t off,
+ QEMUIOVector *qiov,
+ int64_t size,
+ BlockCompletionFunc *cb,
+ void *opaque,
+ RBDAIOCmd cmd)
+{
+ RBDAIOCB *acb;
+ RADOSCB *rcb = NULL;
+ rbd_completion_t c;
+ int r;
+
+ BDRVRBDState *s = bs->opaque;
+
+ acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
+ acb->cmd = cmd;
+ acb->qiov = qiov;
+ assert(!qiov || qiov->size == size);
+
+ acb->ret = 0;
+ acb->error = 0;
+ acb->s = s;
+
+ rcb = g_new(RADOSCB, 1);
+ rcb->acb = acb;
+ rcb->s = acb->s;
+ rcb->size = size;
+ r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
+ if (r < 0) {
+ goto failed;
+ }
+
+ switch (cmd) {
+ case RBD_AIO_WRITE:
+ r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
+ break;
+ case RBD_AIO_READ:
+ r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
+ break;
+ case RBD_AIO_DISCARD:
+ r = rbd_aio_discard_wrapper(s->image, off, size, c);
+ break;
+ case RBD_AIO_FLUSH:
+ r = rbd_aio_flush_wrapper(s->image, c);
+ break;
+ default:
+ r = -EINVAL;
+ }
+
+ if (r < 0) {
+ goto failed_completion;
+ }
+
+ return &acb->common;
+
+failed_completion:
+ rbd_aio_release(c);
+failed:
+ g_free(rcb);
+ qemu_aio_unref(acb);
+ return NULL;
+
+}
+
+#else
static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
int64_t off,
QEMUIOVector *qiov,
@@ -723,6 +846,7 @@ failed:
qemu_aio_unref(acb);
return NULL;
}
+#endif
static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
int64_t sector_num,
--
2.10.2
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [Qemu-devel] [PATCH] RBD: Add support readv,writev for rbd
2016-11-05 5:17 [Qemu-devel] [PATCH] RBD: Add support readv,writev for rbd jazeltq
@ 2016-11-05 17:06 ` Jason Dillaman
0 siblings, 0 replies; 2+ messages in thread
From: Jason Dillaman @ 2016-11-05 17:06 UTC (permalink / raw)
To: Jaze Lee
Cc: Josh Durgin, Jeff Cody, Kevin Wolf, Max Reitz, qemu-block,
qemu-devel, ceph-devel, tianqing
On Sat, Nov 5, 2016 at 1:17 AM, <jazeltq@gmail.com> wrote:
> From: tianqing <tianqing@unitedstack.com>
>
> Rbd can do readv and writev directly, so wo do not need to transform
> iov to buf or vice versa any more.
>
> Signed-off-by: tianqing <tianqing@unitedstack.com>
> ---
> block/rbd.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 124 insertions(+)
>
> diff --git a/block/rbd.c b/block/rbd.c
> index a57b3e3..a405c02 100644
> --- a/block/rbd.c
> +++ b/block/rbd.c
> @@ -53,6 +53,13 @@
> #undef LIBRBD_SUPPORTS_DISCARD
> #endif
>
> +/* rbd_aio_readv, rbd_aio_writev added in 0.1.11 */
> +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 11)
> +#define LIBRBD_SUPPORTS_IOV
> +#else
> +#undef LIBRBD_SUPPORTS_IOV
> +#endif
> +
The LIBRBD_SUPPORTS_IOVEC define is already defined by librbd.h if
it's supported.
> #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)
>
> #define RBD_MAX_CONF_NAME_SIZE 128
> @@ -73,7 +80,10 @@ typedef struct RBDAIOCB {
> BlockAIOCB common;
> int64_t ret;
> QEMUIOVector *qiov;
> +#ifdef LIBRBD_SUPPORTS_IOV
> +#else
> char *bounce;
> +#endif
Could use "#ifndef" to avoid the empty section.
> RBDAIOCmd cmd;
> int error;
> struct BDRVRBDState *s;
> @@ -83,7 +93,10 @@ typedef struct RADOSCB {
> RBDAIOCB *acb;
> struct BDRVRBDState *s;
> int64_t size;
> +#ifdef LIBRBD_SUPPORTS_IOV
> +#else
> char *buf;
> +#endif
Same comment here re: just using #ifndef
> int64_t ret;
> } RADOSCB;
>
> @@ -406,6 +419,48 @@ shutdown:
> return ret;
> }
>
> +
> +#ifdef LIBRBD_SUPPORTS_IOV
> +/*
> + * This aio completion is being called from rbd_finish_bh() and runs in qemu
> + * BH context.
> + */
> +static void qemu_rbd_complete_aio(RADOSCB *rcb)
> +{
> + RBDAIOCB *acb = rcb->acb;
> + int64_t r;
> +
> + r = rcb->ret;
> +
> + if (acb->cmd != RBD_AIO_READ) {
> + if (r < 0) {
> + acb->ret = r;
> + acb->error = 1;
> + } else if (!acb->error) {
> + acb->ret = rcb->size;
> + }
> + } else {
> + if (r < 0) {
> + iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, acb->qiov->size);
> + acb->ret = r;
> + acb->error = 1;
> + } else if (r < rcb->size) {
> + iov_memset(acb->qiov->iov, acb->qiov->niov,
> + rcb->size - r, 0, acb->qiov->size);
> + if (!acb->error) {
> + acb->ret = rcb->size;
> + }
> + } else if (!acb->error) {
> + acb->ret = r;
> + }
> + }
> +
> + g_free(rcb);
> + acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
> +
> + qemu_aio_unref(acb);
> +}
Instead of duplicating the existing qemu_rbd_complete_aio and changing
a couple lines, perhaps just #ifdef/#ifndef around the few lines that
need changes (e.g. the bounce buffer vs iov buffer).
> +#else
> /*
> * This aio completion is being called from rbd_finish_bh() and runs in qemu
> * BH context.
> @@ -449,6 +504,7 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
>
> qemu_aio_unref(acb);
> }
> +#endif
>
> /* TODO Convert to fine grained options */
> static QemuOptsList runtime_opts = {
> @@ -644,6 +700,73 @@ static int rbd_aio_flush_wrapper(rbd_image_t image,
> #endif
> }
>
> +#ifdef LIBRBD_SUPPORTS_IOV
> +static BlockAIOCB *rbd_start_aio_vec(BlockDriverState *bs,
> + int64_t off,
> + QEMUIOVector *qiov,
> + int64_t size,
> + BlockCompletionFunc *cb,
> + void *opaque,
> + RBDAIOCmd cmd)
> +{
Is this method being invoked? I would also recommend just
intermingling the bounce buffer vs iov differences into the existing
rbd_start_aio method instead of duplicating a lot of code.
> + RBDAIOCB *acb;
> + RADOSCB *rcb = NULL;
> + rbd_completion_t c;
> + int r;
> +
> + BDRVRBDState *s = bs->opaque;
> +
> + acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
> + acb->cmd = cmd;
> + acb->qiov = qiov;
> + assert(!qiov || qiov->size == size);
> +
> + acb->ret = 0;
> + acb->error = 0;
> + acb->s = s;
> +
> + rcb = g_new(RADOSCB, 1);
> + rcb->acb = acb;
> + rcb->s = acb->s;
> + rcb->size = size;
> + r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
> + if (r < 0) {
> + goto failed;
> + }
> +
> + switch (cmd) {
> + case RBD_AIO_WRITE:
> + r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
> + break;
> + case RBD_AIO_READ:
> + r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
> + break;
> + case RBD_AIO_DISCARD:
> + r = rbd_aio_discard_wrapper(s->image, off, size, c);
> + break;
> + case RBD_AIO_FLUSH:
> + r = rbd_aio_flush_wrapper(s->image, c);
> + break;
> + default:
> + r = -EINVAL;
> + }
> +
> + if (r < 0) {
> + goto failed_completion;
> + }
> +
> + return &acb->common;
> +
> +failed_completion:
> + rbd_aio_release(c);
> +failed:
> + g_free(rcb);
> + qemu_aio_unref(acb);
> + return NULL;
> +
> +}
> +
> +#else
> static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
> int64_t off,
> QEMUIOVector *qiov,
> @@ -723,6 +846,7 @@ failed:
> qemu_aio_unref(acb);
> return NULL;
> }
> +#endif
>
> static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
> int64_t sector_num,
> --
> 2.10.2
>
In addition to the comments above, I think this patch should wait
until the IOV API changes are merged into librbd so that these changes
can be tested.
--
Jason
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2016-11-05 17:06 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-11-05 5:17 [Qemu-devel] [PATCH] RBD: Add support readv,writev for rbd jazeltq
2016-11-05 17:06 ` Jason Dillaman
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).