From mboxrd@z Thu Jan 1 00:00:00 1970 From: Anthony Liguori Subject: Re: [PATCH 2/3] Move aio implementation out of raw block driver Date: Mon, 22 Sep 2008 21:45:29 -0500 Message-ID: <48D85849.2080302@us.ibm.com> References: <1222125454-21744-1-git-send-email-ryanh@us.ibm.com> <1222125454-21744-3-git-send-email-ryanh@us.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Cc: qemu-devel@nongnu.org, kvm@vger.kernel.org, Marcelo Tosatti To: Ryan Harper Return-path: Received: from e1.ny.us.ibm.com ([32.97.182.141]:41815 "EHLO e1.ny.us.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754194AbYIWCqa (ORCPT ); Mon, 22 Sep 2008 22:46:30 -0400 Received: from d01relay02.pok.ibm.com (d01relay02.pok.ibm.com [9.56.227.234]) by e1.ny.us.ibm.com (8.13.8/8.13.8) with ESMTP id m8N2kTJ0017763 for ; Mon, 22 Sep 2008 22:46:29 -0400 Received: from d01av01.pok.ibm.com (d01av01.pok.ibm.com [9.56.224.215]) by d01relay02.pok.ibm.com (8.13.8/8.13.8/NCO v9.1) with ESMTP id m8N2kTJf270224 for ; Mon, 22 Sep 2008 22:46:29 -0400 Received: from d01av01.pok.ibm.com (loopback [127.0.0.1]) by d01av01.pok.ibm.com (8.12.11.20060308/8.13.3) with ESMTP id m8N2kT0Q008895 for ; Mon, 22 Sep 2008 22:46:29 -0400 In-Reply-To: <1222125454-21744-3-git-send-email-ryanh@us.ibm.com> Sender: kvm-owner@vger.kernel.org List-ID: Ryan Harper wrote: > This patch moves the existing posix aio implementation out of block-raw-posix.c > into aio-posix.c. Added in a per-block device aio driver abstraction. > Block-raw-posix invokes the aio driver methods, .submit, .flush, and .cancel as > needed. aio-posix.c contains the posix aio implementation. > > The changes pave the way for other aio implementations, namely linux aio. Each > block device will init the proper aio driver depending on how the device is > opened. > > Signed-off-by: Ryan Harper > > diff --git a/Makefile b/Makefile > index de6393e..18477ba 100644 > --- a/Makefile > +++ b/Makefile > @@ -60,7 +60,7 @@ BLOCK_OBJS += block-raw-posix.o > endif > > ifdef CONFIG_AIO > -BLOCK_OBJS += compatfd.o > +BLOCK_OBJS += compatfd.o aio-posix.o > endif > > ###################################################################### > diff --git a/Makefile.target b/Makefile.target > index 4a490f4..4c6b3d5 100644 > --- a/Makefile.target > +++ b/Makefile.target > @@ -482,7 +482,7 @@ OBJS+=block-raw-posix.o > endif > > ifdef CONFIG_AIO > -OBJS+=compatfd.o > +OBJS+=compatfd.o aio-posix.o > endif > > LIBS+=-lz > diff --git a/block-aio.h b/block-aio.h > new file mode 100644 > index 0000000..b8597d0 > --- /dev/null > +++ b/block-aio.h > @@ -0,0 +1,78 @@ > +/* > + * QEMU Block AIO API > + * > + * Copyright IBM, Corp. 2008 > + * > + * Authors: > + * Anthony Liguori > + * Ryan Harper > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + * > + */ > + > +#ifndef QEMU_BLOCK_AIO_H > +#define QEMU_BLOCK_AIO_H > + > +#include "qemu-common.h" > +#include "block_int.h" > +#include "block.h" > +#include "qemu-aio.h" > +#ifdef CONFIG_AIO > +#include > +#endif > + > +//#define DEBUG_BLOCK_AIO > +#if defined(DEBUG_BLOCK_AIO) > +#define BLPRINTF(formatCstr, args...) do { fprintf(stderr, formatCstr, ##args); fflush(stderr); } while (0) > This is GCC syntax. You should use C99 syntax. That would be: #define BLPRINTF(fmt, ...) do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) stderr is defined to not be buffered so an explicit fflush is unnecessary. formatCstr is also a goofy name :-) > +#else > +#define BLPRINTF(formatCstr, args...) > should be defined to do { } while (0) to maintain statement semantics. > +#endif > + > +typedef struct RawAIOCB { > + BlockDriverAIOCB common; > + struct aiocb posix_aiocb; > + struct RawAIOCB *next; > + int ret; > +} RawAIOCB; > The whole small-object allocator for AIOCBs seems a bit of a premature optimization to me. It makes this whole thing terribly awkward. Marcelo had a patch at one point to switch the small object allocate to just malloc/free. Marcelo: any reason you didn't follow up with that patch? > +typedef struct AIODriver > +{ > + const char *name; > + RawAIOCB *(*submit)(BlockDriverState *bs, int fd, > + int64_t sector_num, uint8_t *buf, > + int sectors, int write, > + BlockDriverCompletionFunc *cb, > + void *opaque); > + void (*cancel)(BlockDriverAIOCB *aiocb); > + int (*flush)(void *opaque); > +} AIODriver; > I think the AIODriver interface should just take an fd, a completion function, and an opaque pointer. It should have to have knowledge of BDRVRawState or BlockDriverState. > @@ -619,13 +436,14 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, > } > #endif > > - acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque); > - if (!acb) > + if (fd_open(bs) < 0) > return NULL; > - if (aio_read(&acb->aiocb) < 0) { > - qemu_aio_release(acb); > + > + /* submit read */ > + acb = s->aio_dvr->submit(bs, s->fd, sector_num, buf, nb_sectors, 0, cb, > + opaque); > + if (!acb) > return NULL; > - } > return &acb->common; > } > So what happens if !defined(CONFIG_AIO)? By my reading of the code, aio_drv will be NULL and this will SEGV. > @@ -634,13 +452,13 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, > BlockDriverCompletionFunc *cb, void *opaque) > { > RawAIOCB *acb; > + BDRVRawState *s = bs->opaque; > > /* > * If O_DIRECT is used and the buffer is not aligned fall back > * to synchronous IO. > */ > #if defined(O_DIRECT) > - BDRVRawState *s = bs->opaque; > > if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { > QEMUBH *bh; > @@ -652,48 +470,19 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, > } > #endif > > - acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque); > + /* submit write */ > + acb = s->aio_dvr->submit(bs, s->fd, sector_num, buf, nb_sectors, 1, cb, > + opaque); > if (!acb) > return NULL; > - if (aio_write(&acb->aiocb) < 0) { > - qemu_aio_release(acb); > - return NULL; > - } > return &acb->common; > } > > static void raw_aio_cancel(BlockDriverAIOCB *blockacb) > { > - int ret; > - RawAIOCB *acb = (RawAIOCB *)blockacb; > - RawAIOCB **pacb; > - > - ret = aio_cancel(acb->aiocb.aio_fildes, &acb->aiocb); > - if (ret == AIO_NOTCANCELED) { > - /* fail safe: if the aio could not be canceled, we wait for > - it */ > - while (aio_error(&acb->aiocb) == EINPROGRESS); > - } > - > - /* remove the callback from the queue */ > - pacb = &posix_aio_state->first_aio; > - for(;;) { > - if (*pacb == NULL) { > - break; > - } else if (*pacb == acb) { > - *pacb = acb->next; > - qemu_aio_release(acb); > - break; > - } > - pacb = &acb->next; > - } > -} > - > -#else /* CONFIG_AIO */ > -static int posix_aio_init(void) > -{ > + BDRVRawState *s = blockacb->bs->opaque; > + s->aio_dvr->cancel(blockacb); > } > -#endif /* CONFIG_AIO */ > > static void raw_close(BlockDriverState *bs) > { > @@ -898,8 +687,6 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) > BDRVRawState *s = bs->opaque; > int fd, open_flags, ret; > > - posix_aio_init(); > - > #ifdef CONFIG_COCOA > if (strstart(filename, "/dev/cdrom", NULL)) { > kern_return_t kernResult; > @@ -969,6 +756,8 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) > s->fd_media_changed = 1; > } > #endif > + /* init aio driver for this block device */ > + s->aio_dvr = posix_aio_init(); > Doesn't this need to be conditional on CONFIG_AIO? Regards, Anthony Liguori > return 0; > } > >