From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1L5gDs-0004q1-8g for qemu-devel@nongnu.org; Thu, 27 Nov 2008 07:43:56 -0500 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1L5gDq-0004pp-Pq for qemu-devel@nongnu.org; Thu, 27 Nov 2008 07:43:54 -0500 Received: from [199.232.76.173] (port=55920 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1L5gDq-0004pm-KM for qemu-devel@nongnu.org; Thu, 27 Nov 2008 07:43:54 -0500 Received: from mx2.redhat.com ([66.187.237.31]:56925) by monty-python.gnu.org with esmtp (Exim 4.60) (envelope-from ) id 1L5gDq-0000Sp-3i for qemu-devel@nongnu.org; Thu, 27 Nov 2008 07:43:54 -0500 Received: from int-mx2.corp.redhat.com (int-mx2.corp.redhat.com [172.16.27.26]) by mx2.redhat.com (8.13.8/8.13.8) with ESMTP id mARChrNF017386 for ; Thu, 27 Nov 2008 07:43:53 -0500 Received: from ns3.rdu.redhat.com (ns3.rdu.redhat.com [10.11.255.199]) by int-mx2.corp.redhat.com (8.13.1/8.13.1) with ESMTP id mARChqd5007380 for ; Thu, 27 Nov 2008 07:43:53 -0500 Received: from random.random (vpn-10-180.str.redhat.com [10.32.10.180]) by ns3.rdu.redhat.com (8.13.8/8.13.8) with ESMTP id mARChqdk022809 for ; Thu, 27 Nov 2008 07:43:52 -0500 Date: Thu, 27 Nov 2008 13:43:51 +0100 From: Andrea Arcangeli Message-ID: <20081127124351.GD10348@random.random> References: <20081127123538.GC10348@random.random> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20081127123538.GC10348@random.random> Subject: [Qemu-devel] [RFC 2/2] bdrv_aio_readv/writev_em Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Hello, this is the emulated bdrv_aio_readv/writev pure hack to be able to test the dma api in previous patch. About the real thing there are two ways to go: pthread_create() and do aio with pthreads by calling writev by hand. Use kernel based linux aio (I think it's much better as it won't screwup with contiguous I/O, and it handles o_direct random writes and random reads by keeping the lowlevel I/O pipeline full without threads but by just queuing _in_order_ [in order only from the point of view of the I/O scheduler of course] and asynchronously the commands of every different direct-io aio_readv/writev in the lowlevel storage queue without needing any scheduler and thread synchronization involvement). So who's going to add bdrv_aio_readv/writev instead of the below aberration that breaks on backend not supporting aio and breaks with bdrv_aio_cancel too, besides being horribly slow and making direct path slower than the bounce path? Signed-off-by: Andrea Arcangeli Index: block.c =================================================================== --- block.c (revision 5799) +++ block.c (working copy) @@ -53,6 +53,20 @@ uint8_t *buf, int nb_sectors); static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); +static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, + int64_t sector_num, + struct iovec *iov, + int iovnct, + size_t len, + BlockDriverCompletionFunc *cb, + void *opaque); +static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, + int64_t sector_num, + struct iovec *iov, + int iovnct, + size_t len, + BlockDriverCompletionFunc *cb, + void *opaque); BlockDriverState *bdrv_first; @@ -135,6 +149,8 @@ /* add synchronous IO emulation layer */ bdrv->bdrv_read = bdrv_read_em; bdrv->bdrv_write = bdrv_write_em; + bdrv->bdrv_aio_readv = bdrv_aio_readv_em; + bdrv->bdrv_aio_writev = bdrv_aio_writev_em; } bdrv->next = first_drv; first_drv = bdrv; @@ -1341,6 +1401,74 @@ qemu_aio_release(acb); } +static void bdrv_aio_iov_bh_cb(void *opaque) +{ + BlockDriverAIOCBSync *acb = opaque; + acb->common.cb(acb->common.opaque, acb->ret); + qemu_bh_delete(acb->bh); + qemu_free(acb); +} + +static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, + int64_t sector_num, + struct iovec *iov, + int iovcnt, + size_t len, + BlockDriverCompletionFunc *cb, + void *opaque) +{ + BlockDriverAIOCBSync *acb; + int ret = -1, idx; + + for (idx = 0; idx < iovcnt; idx++) { + size_t sectors = iov[idx].iov_len >> SECTOR_BITS; + ret = bdrv_read(bs, sector_num, iov[idx].iov_base, sectors); + if (ret) + break; + sector_num += sectors; + } + acb = qemu_mallocz(sizeof(BlockDriverAIOCBSync)); + if (!acb) + return NULL; + acb->common.bs = bs; + acb->common.cb = cb; + acb->common.opaque = opaque; + acb->bh = qemu_bh_new(bdrv_aio_iov_bh_cb, acb); + acb->ret = ret; + qemu_bh_schedule(acb->bh); + return &acb->common; +} + +static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, + int64_t sector_num, + struct iovec *iov, + int iovcnt, + size_t len, + BlockDriverCompletionFunc *cb, + void *opaque) +{ + BlockDriverAIOCBSync *acb; + int ret = -1, idx; + + for (idx = 0; idx < iovcnt; idx++) { + size_t sectors = iov[idx].iov_len >> SECTOR_BITS; + ret = bdrv_write(bs, sector_num, iov[idx].iov_base, sectors); + if (ret) + break; + sector_num += sectors; + } + acb = qemu_mallocz(sizeof(BlockDriverAIOCBSync)); + if (!acb) + return NULL; + acb->common.bs = bs; + acb->common.cb = cb; + acb->common.opaque = opaque; + acb->bh = qemu_bh_new(bdrv_aio_iov_bh_cb, acb); + acb->ret = ret; + qemu_bh_schedule(acb->bh); + return &acb->common; +} + /**************************************************************/ /* sync block device emulation */