From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1KhugZ-0002yN-V3 for qemu-devel@nongnu.org; Mon, 22 Sep 2008 19:19:20 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1KhugZ-0002yB-5Y for qemu-devel@nongnu.org; Mon, 22 Sep 2008 19:19:19 -0400 Received: from [199.232.76.173] (port=44570 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1KhugZ-0002y8-17 for qemu-devel@nongnu.org; Mon, 22 Sep 2008 19:19:19 -0400 Received: from e5.ny.us.ibm.com ([32.97.182.145]:48231) by monty-python.gnu.org with esmtps (TLS-1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.60) (envelope-from ) id 1KhugY-00055I-Hk for qemu-devel@nongnu.org; Mon, 22 Sep 2008 19:19:18 -0400 Received: from d01relay02.pok.ibm.com (d01relay02.pok.ibm.com [9.56.227.234]) by e5.ny.us.ibm.com (8.13.8/8.13.8) with ESMTP id m8MNHal2024548 for ; Mon, 22 Sep 2008 19:17:36 -0400 Received: from d01av04.pok.ibm.com (d01av04.pok.ibm.com [9.56.224.64]) by d01relay02.pok.ibm.com (8.13.8/8.13.8/NCO v9.1) with ESMTP id m8MNHaXI281634 for ; Mon, 22 Sep 2008 19:17:36 -0400 Received: from d01av04.pok.ibm.com (loopback [127.0.0.1]) by d01av04.pok.ibm.com (8.12.11.20060308/8.13.3) with ESMTP id m8MNHaUU031655 for ; Mon, 22 Sep 2008 19:17:36 -0400 From: Ryan Harper Date: Mon, 22 Sep 2008 18:17:33 -0500 Message-Id: <1222125454-21744-3-git-send-email-ryanh@us.ibm.com> In-Reply-To: <1222125454-21744-1-git-send-email-ryanh@us.ibm.com> References: <1222125454-21744-1-git-send-email-ryanh@us.ibm.com> Subject: [Qemu-devel] [PATCH 2/3] Move aio implementation out of raw block driver Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: aliguori@us.ibm.com, Ryan Harper , kvm@vger.kernel.org This patch moves the existing posix aio implementation out of block-raw-posix.c into aio-posix.c. Added in a per-block device aio driver abstraction. Block-raw-posix invokes the aio driver methods, .submit, .flush, and .cancel as needed. aio-posix.c contains the posix aio implementation. The changes pave the way for other aio implementations, namely linux aio. Each block device will init the proper aio driver depending on how the device is opened. Signed-off-by: Ryan Harper diff --git a/Makefile b/Makefile index de6393e..18477ba 100644 --- a/Makefile +++ b/Makefile @@ -60,7 +60,7 @@ BLOCK_OBJS += block-raw-posix.o endif ifdef CONFIG_AIO -BLOCK_OBJS += compatfd.o +BLOCK_OBJS += compatfd.o aio-posix.o endif ###################################################################### diff --git a/Makefile.target b/Makefile.target index 4a490f4..4c6b3d5 100644 --- a/Makefile.target +++ b/Makefile.target @@ -482,7 +482,7 @@ OBJS+=block-raw-posix.o endif ifdef CONFIG_AIO -OBJS+=compatfd.o +OBJS+=compatfd.o aio-posix.o endif LIBS+=-lz diff --git a/block-aio.h b/block-aio.h new file mode 100644 index 0000000..b8597d0 --- /dev/null +++ b/block-aio.h @@ -0,0 +1,78 @@ +/* + * QEMU Block AIO API + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori + * Ryan Harper + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_BLOCK_AIO_H +#define QEMU_BLOCK_AIO_H + +#include "qemu-common.h" +#include "block_int.h" +#include "block.h" +#include "qemu-aio.h" +#ifdef CONFIG_AIO +#include +#endif + +//#define DEBUG_BLOCK_AIO +#if defined(DEBUG_BLOCK_AIO) +#define BLPRINTF(formatCstr, args...) do { fprintf(stderr, formatCstr, ##args); fflush(stderr); } while (0) +#else +#define BLPRINTF(formatCstr, args...) +#endif + +typedef struct RawAIOCB { + BlockDriverAIOCB common; + struct aiocb posix_aiocb; + struct RawAIOCB *next; + int ret; +} RawAIOCB; + +typedef struct AIODriver +{ + const char *name; + RawAIOCB *(*submit)(BlockDriverState *bs, int fd, + int64_t sector_num, uint8_t *buf, + int sectors, int write, + BlockDriverCompletionFunc *cb, + void *opaque); + void (*cancel)(BlockDriverAIOCB *aiocb); + int (*flush)(void *opaque); +} AIODriver; + +typedef struct BDRVRawState { + int fd; + int type; + unsigned int lseek_err_cnt; +#if defined(__linux__) + /* linux floppy specific */ + int fd_open_flags; + int64_t fd_open_time; + int64_t fd_error_time; + int fd_got_error; + int fd_media_changed; +#endif +#if defined(O_DIRECT) + uint8_t* aligned_buf; +#endif + AIODriver *aio_dvr; +} BDRVRawState; + +typedef struct AIOState +{ + int fd; + RawAIOCB *first_aio; +} AIOState; + +AIODriver* posix_aio_init(void); + +#endif /* QEMU_BLOCK_AIO_H */ diff --git a/block-raw-posix.c b/block-raw-posix.c index 41f9976..cab7094 100644 --- a/block-raw-posix.c +++ b/block-raw-posix.c @@ -25,11 +25,8 @@ #include "qemu-timer.h" #include "qemu-char.h" #include "block_int.h" -#include "compatfd.h" +#include "block-aio.h" #include -#ifdef CONFIG_AIO -#include -#endif #ifdef CONFIG_COCOA #include @@ -84,25 +81,6 @@ reopen it to see if the disk has been changed */ #define FD_OPEN_TIMEOUT 1000 -typedef struct BDRVRawState { - int fd; - int type; - unsigned int lseek_err_cnt; -#if defined(__linux__) - /* linux floppy specific */ - int fd_open_flags; - int64_t fd_open_time; - int64_t fd_error_time; - int fd_got_error; - int fd_media_changed; -#endif -#if defined(O_DIRECT) - uint8_t* aligned_buf; -#endif -} BDRVRawState; - -static int posix_aio_init(void); - static int fd_open(BlockDriverState *bs); static int raw_open(BlockDriverState *bs, const char *filename, int flags) @@ -110,8 +88,6 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) BDRVRawState *s = bs->opaque; int fd, open_flags, ret; - posix_aio_init(); - s->lseek_err_cnt = 0; open_flags = O_BINARY; @@ -149,6 +125,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) } } #endif + /* init aio driver for this block device */ + s->aio_dvr = posix_aio_init(); return 0; } @@ -429,166 +407,6 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset, #define raw_pwrite raw_pwrite_aligned #endif - -#ifdef CONFIG_AIO -/***********************************************************/ -/* Unix AIO using POSIX AIO */ - -typedef struct RawAIOCB { - BlockDriverAIOCB common; - struct aiocb aiocb; - struct RawAIOCB *next; - int ret; -} RawAIOCB; - -typedef struct PosixAioState -{ - int fd; - RawAIOCB *first_aio; -} PosixAioState; - -static void posix_aio_read(void *opaque) -{ - PosixAioState *s = opaque; - RawAIOCB *acb, **pacb; - int ret; - size_t offset; - union { - struct qemu_signalfd_siginfo siginfo; - char buf[128]; - } sig; - - /* try to read from signalfd, don't freak out if we can't read anything */ - offset = 0; - while (offset < 128) { - ssize_t len; - - len = read(s->fd, sig.buf + offset, 128 - offset); - if (len == -1 && errno == EINTR) - continue; - if (len == -1 && errno == EAGAIN) { - /* there is no natural reason for this to happen, - * so we'll spin hard until we get everything just - * to be on the safe side. */ - if (offset > 0) - continue; - } - - offset += len; - } - - for(;;) { - pacb = &s->first_aio; - for(;;) { - acb = *pacb; - if (!acb) - goto the_end; - ret = aio_error(&acb->aiocb); - if (ret == ECANCELED) { - /* remove the request */ - *pacb = acb->next; - qemu_aio_release(acb); - } else if (ret != EINPROGRESS) { - /* end of aio */ - if (ret == 0) { - ret = aio_return(&acb->aiocb); - if (ret == acb->aiocb.aio_nbytes) - ret = 0; - else - ret = -EINVAL; - } else { - ret = -ret; - } - /* remove the request */ - *pacb = acb->next; - /* call the callback */ - acb->common.cb(acb->common.opaque, ret); - qemu_aio_release(acb); - break; - } else { - pacb = &acb->next; - } - } - } - the_end: ; -} - -static int posix_aio_flush(void *opaque) -{ - PosixAioState *s = opaque; - return !!s->first_aio; -} - -static PosixAioState *posix_aio_state; - -static int posix_aio_init(void) -{ - sigset_t mask; - PosixAioState *s; - - if (posix_aio_state) - return 0; - - s = qemu_malloc(sizeof(PosixAioState)); - if (s == NULL) - return -ENOMEM; - - /* Make sure to block AIO signal */ - sigemptyset(&mask); - sigaddset(&mask, SIGUSR2); - sigprocmask(SIG_BLOCK, &mask, NULL); - - s->first_aio = NULL; - s->fd = qemu_signalfd(&mask); - - fcntl(s->fd, F_SETFL, O_NONBLOCK); - - qemu_aio_set_fd_handler(s->fd, posix_aio_read, NULL, posix_aio_flush, s); - -#if defined(__GLIBC__) && defined(__linux__) - { - /* XXX: aio thread exit seems to hang on RedHat 9 and this init - seems to fix the problem. */ - struct aioinit ai; - memset(&ai, 0, sizeof(ai)); - ai.aio_threads = 1; - ai.aio_num = 1; - ai.aio_idle_time = 365 * 100000; - aio_init(&ai); - } -#endif - posix_aio_state = s; - - return 0; -} - -static RawAIOCB *raw_aio_setup(BlockDriverState *bs, - int64_t sector_num, uint8_t *buf, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) -{ - BDRVRawState *s = bs->opaque; - RawAIOCB *acb; - - if (fd_open(bs) < 0) - return NULL; - - acb = qemu_aio_get(bs, cb, opaque); - if (!acb) - return NULL; - acb->aiocb.aio_fildes = s->fd; - acb->aiocb.aio_sigevent.sigev_signo = SIGUSR2; - acb->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL; - acb->aiocb.aio_buf = buf; - if (nb_sectors < 0) - acb->aiocb.aio_nbytes = -nb_sectors; - else - acb->aiocb.aio_nbytes = nb_sectors * 512; - acb->aiocb.aio_offset = sector_num * 512; - acb->next = posix_aio_state->first_aio; - posix_aio_state->first_aio = acb; - return acb; -} - static void raw_aio_em_cb(void* opaque) { RawAIOCB *acb = opaque; @@ -601,14 +419,13 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { RawAIOCB *acb; + BDRVRawState *s = bs->opaque; /* * If O_DIRECT is used and the buffer is not aligned fall back * to synchronous IO. */ #if defined(O_DIRECT) - BDRVRawState *s = bs->opaque; - if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { QEMUBH *bh; acb = qemu_aio_get(bs, cb, opaque); @@ -619,13 +436,14 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, } #endif - acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque); - if (!acb) + if (fd_open(bs) < 0) return NULL; - if (aio_read(&acb->aiocb) < 0) { - qemu_aio_release(acb); + + /* submit read */ + acb = s->aio_dvr->submit(bs, s->fd, sector_num, buf, nb_sectors, 0, cb, + opaque); + if (!acb) return NULL; - } return &acb->common; } @@ -634,13 +452,13 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { RawAIOCB *acb; + BDRVRawState *s = bs->opaque; /* * If O_DIRECT is used and the buffer is not aligned fall back * to synchronous IO. */ #if defined(O_DIRECT) - BDRVRawState *s = bs->opaque; if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { QEMUBH *bh; @@ -652,48 +470,19 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, } #endif - acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque); + /* submit write */ + acb = s->aio_dvr->submit(bs, s->fd, sector_num, buf, nb_sectors, 1, cb, + opaque); if (!acb) return NULL; - if (aio_write(&acb->aiocb) < 0) { - qemu_aio_release(acb); - return NULL; - } return &acb->common; } static void raw_aio_cancel(BlockDriverAIOCB *blockacb) { - int ret; - RawAIOCB *acb = (RawAIOCB *)blockacb; - RawAIOCB **pacb; - - ret = aio_cancel(acb->aiocb.aio_fildes, &acb->aiocb); - if (ret == AIO_NOTCANCELED) { - /* fail safe: if the aio could not be canceled, we wait for - it */ - while (aio_error(&acb->aiocb) == EINPROGRESS); - } - - /* remove the callback from the queue */ - pacb = &posix_aio_state->first_aio; - for(;;) { - if (*pacb == NULL) { - break; - } else if (*pacb == acb) { - *pacb = acb->next; - qemu_aio_release(acb); - break; - } - pacb = &acb->next; - } -} - -#else /* CONFIG_AIO */ -static int posix_aio_init(void) -{ + BDRVRawState *s = blockacb->bs->opaque; + s->aio_dvr->cancel(blockacb); } -#endif /* CONFIG_AIO */ static void raw_close(BlockDriverState *bs) { @@ -898,8 +687,6 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) BDRVRawState *s = bs->opaque; int fd, open_flags, ret; - posix_aio_init(); - #ifdef CONFIG_COCOA if (strstart(filename, "/dev/cdrom", NULL)) { kern_return_t kernResult; @@ -969,6 +756,8 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) s->fd_media_changed = 1; } #endif + /* init aio driver for this block device */ + s->aio_dvr = posix_aio_init(); return 0; }