From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1KhxvB-00078a-Pl for qemu-devel@nongnu.org; Mon, 22 Sep 2008 22:46:37 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1KhxvA-00078O-9A for qemu-devel@nongnu.org; Mon, 22 Sep 2008 22:46:36 -0400 Received: from [199.232.76.173] (port=40410 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1KhxvA-00078L-4E for qemu-devel@nongnu.org; Mon, 22 Sep 2008 22:46:36 -0400 Received: from e5.ny.us.ibm.com ([32.97.182.145]:40255) by monty-python.gnu.org with esmtps (TLS-1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.60) (envelope-from ) id 1Khxv9-0005Cc-JE for qemu-devel@nongnu.org; Mon, 22 Sep 2008 22:46:35 -0400 Received: from d01relay04.pok.ibm.com (d01relay04.pok.ibm.com [9.56.227.236]) by e5.ny.us.ibm.com (8.13.8/8.13.8) with ESMTP id m8N2kTlO010030 for ; Mon, 22 Sep 2008 22:46:29 -0400 Received: from d01av01.pok.ibm.com (d01av01.pok.ibm.com [9.56.224.215]) by d01relay04.pok.ibm.com (8.13.8/8.13.8/NCO v9.1) with ESMTP id m8N2kTUj231216 for ; Mon, 22 Sep 2008 22:46:29 -0400 Received: from d01av01.pok.ibm.com (loopback [127.0.0.1]) by d01av01.pok.ibm.com (8.12.11.20060308/8.13.3) with ESMTP id m8N2kT0K008895 for ; Mon, 22 Sep 2008 22:46:29 -0400 Message-ID: <48D85849.2080302@us.ibm.com> Date: Mon, 22 Sep 2008 21:45:29 -0500 From: Anthony Liguori MIME-Version: 1.0 References: <1222125454-21744-1-git-send-email-ryanh@us.ibm.com> <1222125454-21744-3-git-send-email-ryanh@us.ibm.com> In-Reply-To: <1222125454-21744-3-git-send-email-ryanh@us.ibm.com> Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Subject: [Qemu-devel] Re: [PATCH 2/3] Move aio implementation out of raw block driver Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Ryan Harper Cc: Marcelo Tosatti , qemu-devel@nongnu.org, kvm@vger.kernel.org Ryan Harper wrote: > This patch moves the existing posix aio implementation out of block-raw-posix.c > into aio-posix.c. Added in a per-block device aio driver abstraction. > Block-raw-posix invokes the aio driver methods, .submit, .flush, and .cancel as > needed. aio-posix.c contains the posix aio implementation. > > The changes pave the way for other aio implementations, namely linux aio. Each > block device will init the proper aio driver depending on how the device is > opened. > > Signed-off-by: Ryan Harper > > diff --git a/Makefile b/Makefile > index de6393e..18477ba 100644 > --- a/Makefile > +++ b/Makefile > @@ -60,7 +60,7 @@ BLOCK_OBJS += block-raw-posix.o > endif > > ifdef CONFIG_AIO > -BLOCK_OBJS += compatfd.o > +BLOCK_OBJS += compatfd.o aio-posix.o > endif > > ###################################################################### > diff --git a/Makefile.target b/Makefile.target > index 4a490f4..4c6b3d5 100644 > --- a/Makefile.target > +++ b/Makefile.target > @@ -482,7 +482,7 @@ OBJS+=block-raw-posix.o > endif > > ifdef CONFIG_AIO > -OBJS+=compatfd.o > +OBJS+=compatfd.o aio-posix.o > endif > > LIBS+=-lz > diff --git a/block-aio.h b/block-aio.h > new file mode 100644 > index 0000000..b8597d0 > --- /dev/null > +++ b/block-aio.h > @@ -0,0 +1,78 @@ > +/* > + * QEMU Block AIO API > + * > + * Copyright IBM, Corp. 2008 > + * > + * Authors: > + * Anthony Liguori > + * Ryan Harper > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + * > + */ > + > +#ifndef QEMU_BLOCK_AIO_H > +#define QEMU_BLOCK_AIO_H > + > +#include "qemu-common.h" > +#include "block_int.h" > +#include "block.h" > +#include "qemu-aio.h" > +#ifdef CONFIG_AIO > +#include > +#endif > + > +//#define DEBUG_BLOCK_AIO > +#if defined(DEBUG_BLOCK_AIO) > +#define BLPRINTF(formatCstr, args...) do { fprintf(stderr, formatCstr, ##args); fflush(stderr); } while (0) > This is GCC syntax. You should use C99 syntax. That would be: #define BLPRINTF(fmt, ...) do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) stderr is defined to not be buffered so an explicit fflush is unnecessary. formatCstr is also a goofy name :-) > +#else > +#define BLPRINTF(formatCstr, args...) > should be defined to do { } while (0) to maintain statement semantics. > +#endif > + > +typedef struct RawAIOCB { > + BlockDriverAIOCB common; > + struct aiocb posix_aiocb; > + struct RawAIOCB *next; > + int ret; > +} RawAIOCB; > The whole small-object allocator for AIOCBs seems a bit of a premature optimization to me. It makes this whole thing terribly awkward. Marcelo had a patch at one point to switch the small object allocate to just malloc/free. Marcelo: any reason you didn't follow up with that patch? > +typedef struct AIODriver > +{ > + const char *name; > + RawAIOCB *(*submit)(BlockDriverState *bs, int fd, > + int64_t sector_num, uint8_t *buf, > + int sectors, int write, > + BlockDriverCompletionFunc *cb, > + void *opaque); > + void (*cancel)(BlockDriverAIOCB *aiocb); > + int (*flush)(void *opaque); > +} AIODriver; > I think the AIODriver interface should just take an fd, a completion function, and an opaque pointer. It should have to have knowledge of BDRVRawState or BlockDriverState. > @@ -619,13 +436,14 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, > } > #endif > > - acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque); > - if (!acb) > + if (fd_open(bs) < 0) > return NULL; > - if (aio_read(&acb->aiocb) < 0) { > - qemu_aio_release(acb); > + > + /* submit read */ > + acb = s->aio_dvr->submit(bs, s->fd, sector_num, buf, nb_sectors, 0, cb, > + opaque); > + if (!acb) > return NULL; > - } > return &acb->common; > } > So what happens if !defined(CONFIG_AIO)? By my reading of the code, aio_drv will be NULL and this will SEGV. > @@ -634,13 +452,13 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, > BlockDriverCompletionFunc *cb, void *opaque) > { > RawAIOCB *acb; > + BDRVRawState *s = bs->opaque; > > /* > * If O_DIRECT is used and the buffer is not aligned fall back > * to synchronous IO. > */ > #if defined(O_DIRECT) > - BDRVRawState *s = bs->opaque; > > if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { > QEMUBH *bh; > @@ -652,48 +470,19 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, > } > #endif > > - acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque); > + /* submit write */ > + acb = s->aio_dvr->submit(bs, s->fd, sector_num, buf, nb_sectors, 1, cb, > + opaque); > if (!acb) > return NULL; > - if (aio_write(&acb->aiocb) < 0) { > - qemu_aio_release(acb); > - return NULL; > - } > return &acb->common; > } > > static void raw_aio_cancel(BlockDriverAIOCB *blockacb) > { > - int ret; > - RawAIOCB *acb = (RawAIOCB *)blockacb; > - RawAIOCB **pacb; > - > - ret = aio_cancel(acb->aiocb.aio_fildes, &acb->aiocb); > - if (ret == AIO_NOTCANCELED) { > - /* fail safe: if the aio could not be canceled, we wait for > - it */ > - while (aio_error(&acb->aiocb) == EINPROGRESS); > - } > - > - /* remove the callback from the queue */ > - pacb = &posix_aio_state->first_aio; > - for(;;) { > - if (*pacb == NULL) { > - break; > - } else if (*pacb == acb) { > - *pacb = acb->next; > - qemu_aio_release(acb); > - break; > - } > - pacb = &acb->next; > - } > -} > - > -#else /* CONFIG_AIO */ > -static int posix_aio_init(void) > -{ > + BDRVRawState *s = blockacb->bs->opaque; > + s->aio_dvr->cancel(blockacb); > } > -#endif /* CONFIG_AIO */ > > static void raw_close(BlockDriverState *bs) > { > @@ -898,8 +687,6 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) > BDRVRawState *s = bs->opaque; > int fd, open_flags, ret; > > - posix_aio_init(); > - > #ifdef CONFIG_COCOA > if (strstart(filename, "/dev/cdrom", NULL)) { > kern_return_t kernResult; > @@ -969,6 +756,8 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) > s->fd_media_changed = 1; > } > #endif > + /* init aio driver for this block device */ > + s->aio_dvr = posix_aio_init(); > Doesn't this need to be conditional on CONFIG_AIO? Regards, Anthony Liguori > return 0; > } > >