From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1LOdi7-0005KH-Q4 for qemu-devel@nongnu.org; Sun, 18 Jan 2009 14:53:32 -0500 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1LOdi7-0005JH-2H for qemu-devel@nongnu.org; Sun, 18 Jan 2009 14:53:31 -0500 Received: from [199.232.76.173] (port=52594 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1LOdi5-0005Ij-AX for qemu-devel@nongnu.org; Sun, 18 Jan 2009 14:53:29 -0500 Received: from mx2.redhat.com ([66.187.237.31]:57968) by monty-python.gnu.org with esmtp (Exim 4.60) (envelope-from ) id 1LOdi4-0000Jw-Db for qemu-devel@nongnu.org; Sun, 18 Jan 2009 14:53:28 -0500 From: Avi Kivity Date: Sun, 18 Jan 2009 21:53:17 +0200 Message-Id: <1232308399-21679-4-git-send-email-avi@redhat.com> In-Reply-To: <1232308399-21679-1-git-send-email-avi@redhat.com> References: <1232308399-21679-1-git-send-email-avi@redhat.com> Subject: [Qemu-devel] [PATCH 3/5] Vectored block device API Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org, Anthony Liguori Most devices that are capable of DMA are also capable of scatter-gather. With the memory mapping API, this means that the device code needs to be able to access discontiguous host memory regions. For block devices, this translates to vectored I/O. This patch implements an aynchronous vectored interface for the qemu block devices. At the moment all I/O is bounced and submitted through the non-vectored API; in the future we will convert block devices to natively support vectored I/O wherever possible. Signed-off-by: Avi Kivity --- block.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ block.h | 7 +++++ 2 files changed, 99 insertions(+), 0 deletions(-) diff --git a/block.c b/block.c index 3250327..4b2e34b 100644 --- a/block.c +++ b/block.c @@ -1246,6 +1246,93 @@ char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn) /**************************************************************/ /* async I/Os */ +typedef struct VectorTranslationState { + struct iovec *iov; + int niov; + uint8_t *bounce; + int is_write; + BlockDriverAIOCB *aiocb; + BlockDriverAIOCB *this_aiocb; +} VectorTranslationState; + +static void flatten_iovec(VectorTranslationState *s) +{ + uint8_t *p = s->bounce; + int i; + + for (i = 0; i < s->niov; ++i) { + memcpy(p, s->iov[i].iov_base, s->iov[i].iov_len); + p += s->iov[i].iov_len; + } +} + +static void unflatten_iovec(VectorTranslationState *s) +{ + uint8_t *p = s->bounce; + int i; + + for (i = 0; i < s->niov; ++i) { + memcpy(s->iov[i].iov_base, p, s->iov[i].iov_len); + p += s->iov[i].iov_len; + } +} + +static void bdrv_aio_rw_vector_cb(void *opaque, int ret) +{ + VectorTranslationState *s = opaque; + + if (!s->is_write) { + unflatten_iovec(s); + } + qemu_free(s->bounce); + s->this_aiocb->cb(s->this_aiocb->opaque, ret); + qemu_aio_release(s->this_aiocb); +} + +static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, + int64_t sector_num, + struct iovec *iov, int niov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque, + int is_write) + +{ + VectorTranslationState *s = qemu_mallocz(sizeof(*s)); + BlockDriverAIOCB *aiocb = qemu_aio_get(bs, cb, opaque); + + s->this_aiocb = aiocb; + s->iov = iov; + s->niov = niov; + s->bounce = qemu_memalign(512, nb_sectors * 512); + s->is_write = is_write; + if (is_write) { + flatten_iovec(s); + s->aiocb = bdrv_aio_write(bs, sector_num, s->bounce, nb_sectors, + bdrv_aio_rw_vector_cb, s); + } else { + s->aiocb = bdrv_aio_read(bs, sector_num, s->bounce, nb_sectors, + bdrv_aio_rw_vector_cb, s); + } + return aiocb; +} + +BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, + struct iovec *iov, int niov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return bdrv_aio_rw_vector(bs, sector_num, iov, niov, nb_sectors, + cb, opaque, 0); +} + +BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, + struct iovec *iov, int niov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return bdrv_aio_rw_vector(bs, sector_num, iov, niov, nb_sectors, + cb, opaque, 1); +} + BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque) @@ -1294,6 +1381,11 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb) { BlockDriver *drv = acb->bs->drv; + if (acb->cb == bdrv_aio_rw_vector_cb) { + VectorTranslationState *s = acb->opaque; + acb = s->aiocb; + } + drv->bdrv_aio_cancel(acb); } diff --git a/block.h b/block.h index c3314a1..0391704 100644 --- a/block.h +++ b/block.h @@ -85,6 +85,13 @@ int bdrv_commit(BlockDriverState *bs); typedef struct BlockDriverAIOCB BlockDriverAIOCB; typedef void BlockDriverCompletionFunc(void *opaque, int ret); +BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, + struct iovec *iov, int niov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque); +BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, + struct iovec *iov, int niov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque); + BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque); -- 1.6.0.6