Subject: [PATCH 2/3] Move aio implementation out of raw block driver Cc: aliguori@us.ibm.com Cc: kvm@vger.kernel.org This patch moves the existing posix aio implementation out of block-raw-posix.c into aio-posix.c. Added in a per-block device aio driver abstraction. Block-raw-posix invokes the aio driver methods, .submit, .flush, and .cancel as needed. aio-posix.c contains the posix aio implementation. The changes pave the way for other aio implementations, namely linux aio. Each block device will init the proper aio driver depending on how the device is opened. Signed-off-by: Ryan Harper diff --git a/Makefile b/Makefile index de6393e..18477ba 100644 --- a/Makefile +++ b/Makefile @@ -60,7 +60,7 @@ BLOCK_OBJS += block-raw-posix.o endif ifdef CONFIG_AIO -BLOCK_OBJS += compatfd.o +BLOCK_OBJS += compatfd.o aio-posix.o endif ###################################################################### diff --git a/Makefile.target b/Makefile.target index 4a490f4..4c6b3d5 100644 --- a/Makefile.target +++ b/Makefile.target @@ -482,7 +482,7 @@ OBJS+=block-raw-posix.o endif ifdef CONFIG_AIO -OBJS+=compatfd.o +OBJS+=compatfd.o aio-posix.o endif LIBS+=-lz diff --git a/aio-posix.c b/aio-posix.c new file mode 100644 index 0000000..cd85420 --- /dev/null +++ b/aio-posix.c @@ -0,0 +1,256 @@ +/* + * Block driver for RAW files (posix) + * + * Copyright (c) 2006 Fabrice Bellard + * Copyright (c) 2008 IBM Corp. + * Authors: Anthony Liguori + * : Ryan Harper + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu-common.h" +#include "block_int.h" +#include "block.h" +#include "block-aio.h" +#include "compatfd.h" +#ifdef CONFIG_AIO +#include +#endif + +/* Unix AIO using POSIX AIO */ +static AIOState *posix_aio_state; +typedef AIOState PosixAioState; +AIODriver posix_aio_dvr; + +static void pa_read(void *opaque) +{ + PosixAioState *s = opaque; + RawAIOCB *acb, **pacb; + int ret; + size_t offset; + union { + struct qemu_signalfd_siginfo siginfo; + char buf[128]; + } sig; + + BLPRINTF("%s ->\n", __FUNCTION__); + /* try to read from signalfd, don't freak out if we can't read anything */ + offset = 0; + while (offset < 128) { + ssize_t len; + + len = read(s->fd, sig.buf + offset, 128 - offset); + if (len == -1 && errno == EINTR) + continue; + if (len == -1 && errno == EAGAIN) { + /* there is no natural reason for this to happen, + * so we'll spin hard until we get everything just + * to be on the safe side. */ + if (offset > 0) + continue; + } + + offset += len; + } + + for(;;) { + pacb = &s->first_aio; + for(;;) { + acb = *pacb; + if (!acb) + goto the_end; + ret = aio_error(&acb->posix_aiocb); + if (ret == ECANCELED) { + /* remove the request */ + *pacb = acb->next; + qemu_aio_release(acb); + } else if (ret != EINPROGRESS) { + /* end of aio */ + if (ret == 0) { + ret = aio_return(&acb->posix_aiocb); + if (ret == acb->posix_aiocb.aio_nbytes) + ret = 0; + else + ret = -EINVAL; + } else { + ret = -ret; + } + /* remove the request */ + *pacb = acb->next; + /* call the callback */ + acb->common.cb(acb->common.opaque, ret); + qemu_aio_release(acb); + break; + } else { + pacb = &acb->next; + } + } + } + the_end: ; + BLPRINTF("%s <-\n", __FUNCTION__); +} + +static int pa_flush(void *opaque) +{ + PosixAioState *s = opaque; + return !!s->first_aio; +} + + +static int pa_init(void) +{ + sigset_t mask; + PosixAioState *s; + + BLPRINTF("%s ->\n", __FUNCTION__); + if (posix_aio_state) + return 0; + + s = qemu_malloc(sizeof(PosixAioState)); + if (s == NULL) + return -ENOMEM; + + /* Make sure to block AIO signal */ + sigemptyset(&mask); + sigaddset(&mask, SIGUSR2); + sigprocmask(SIG_BLOCK, &mask, NULL); + + s->first_aio = NULL; + s->fd = qemu_signalfd(&mask); + + fcntl(s->fd, F_SETFL, O_NONBLOCK); + + qemu_aio_set_fd_handler(s->fd, pa_read, NULL, pa_flush, s); + +#if defined(__GLIBC__) && defined(__linux__) + { + /* XXX: aio thread exit seems to hang on RedHat 9 and this init + seems to fix the problem. */ + struct aioinit ai; + memset(&ai, 0, sizeof(ai)); + ai.aio_threads = 1; + ai.aio_num = 1; + ai.aio_idle_time = 365 * 100000; + aio_init(&ai); + } +#endif + posix_aio_state = s; + + BLPRINTF("%s <-\n", __FUNCTION__); + return 0; +} + +static RawAIOCB *pa_setup(BlockDriverState *bs, int fd, int64_t sector_num, + uint8_t *buf, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BDRVRawState *s = bs->opaque; + RawAIOCB *acb; + + BLPRINTF("%s ->\n", __FUNCTION__); + acb = qemu_aio_get(bs, cb, opaque); + if (!acb) + return NULL; + acb->posix_aiocb.aio_fildes = s->fd; + acb->posix_aiocb.aio_sigevent.sigev_signo = SIGUSR2; + acb->posix_aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL; + acb->posix_aiocb.aio_buf = buf; + if (nb_sectors < 0) + acb->posix_aiocb.aio_nbytes = -nb_sectors; + else + acb->posix_aiocb.aio_nbytes = nb_sectors * 512; + acb->posix_aiocb.aio_offset = sector_num * 512; + + acb->next = posix_aio_state->first_aio; + posix_aio_state->first_aio = acb; + + BLPRINTF("%s <-\n", __FUNCTION__); + return acb; +} + +static void pa_cancel(BlockDriverAIOCB *blockacb) +{ + int ret; + RawAIOCB *acb = (RawAIOCB *)blockacb; + RawAIOCB **pacb; + + BLPRINTF("%s ->\n", __FUNCTION__); + ret = aio_cancel(acb->posix_aiocb.aio_fildes, &acb->posix_aiocb); + if (ret == AIO_NOTCANCELED) { + /* fail safe: if the aio could not be canceled, we wait for + it */ + while (aio_error(&acb->posix_aiocb) == EINPROGRESS); + } + + /* remove the callback from the queue */ + pacb = &posix_aio_state->first_aio; + for(;;) { + if (*pacb == NULL) { + break; + } else if (*pacb == acb) { + *pacb = acb->next; + qemu_aio_release(acb); + break; + } + pacb = &acb->next; + } + BLPRINTF("%s <-\n", __FUNCTION__); +} + +static RawAIOCB *pa_submit(BlockDriverState *bs, int fd, + int64_t sector_num, uint8_t *buf, + int nb_sectors, int write, + BlockDriverCompletionFunc *cb, void *opaque) +{ + int ret; + RawAIOCB *acb = pa_setup(bs, fd, sector_num, buf, nb_sectors, + cb, opaque); + BLPRINTF("%s ->\n", __FUNCTION__); + if (!acb) + return NULL; + + if (write) + ret = aio_write(&acb->posix_aiocb); + else + ret = aio_read(&acb->posix_aiocb); + + if (ret < 0) { + qemu_aio_release(acb); + return NULL; + } + BLPRINTF("%s <-\n", __FUNCTION__); + return acb; +} + +static AIODriver posix_aio_drv = { + .name = "posix", + .submit = pa_submit, + .cancel = pa_cancel, + .flush = pa_flush, +}; + +AIODriver* posix_aio_init(void) +{ + BLPRINTF("%s ->\n", __FUNCTION__); + if (pa_init() != 0) + return NULL; + return &posix_aio_drv; + BLPRINTF("%s <-\n", __FUNCTION__); +} diff --git a/block-aio.h b/block-aio.h new file mode 100644 index 0000000..b8597d0 --- /dev/null +++ b/block-aio.h @@ -0,0 +1,78 @@ +/* + * QEMU Block AIO API + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori + * Ryan Harper + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_BLOCK_AIO_H +#define QEMU_BLOCK_AIO_H + +#include "qemu-common.h" +#include "block_int.h" +#include "block.h" +#include "qemu-aio.h" +#ifdef CONFIG_AIO +#include +#endif + +//#define DEBUG_BLOCK_AIO +#if defined(DEBUG_BLOCK_AIO) +#define BLPRINTF(formatCstr, args...) do { fprintf(stderr, formatCstr, ##args); fflush(stderr); } while (0) +#else +#define BLPRINTF(formatCstr, args...) +#endif + +typedef struct RawAIOCB { + BlockDriverAIOCB common; + struct aiocb posix_aiocb; + struct RawAIOCB *next; + int ret; +} RawAIOCB; + +typedef struct AIODriver +{ + const char *name; + RawAIOCB *(*submit)(BlockDriverState *bs, int fd, + int64_t sector_num, uint8_t *buf, + int sectors, int write, + BlockDriverCompletionFunc *cb, + void *opaque); + void (*cancel)(BlockDriverAIOCB *aiocb); + int (*flush)(void *opaque); +} AIODriver; + +typedef struct BDRVRawState { + int fd; + int type; + unsigned int lseek_err_cnt; +#if defined(__linux__) + /* linux floppy specific */ + int fd_open_flags; + int64_t fd_open_time; + int64_t fd_error_time; + int fd_got_error; + int fd_media_changed; +#endif +#if defined(O_DIRECT) + uint8_t* aligned_buf; +#endif + AIODriver *aio_dvr; +} BDRVRawState; + +typedef struct AIOState +{ + int fd; + RawAIOCB *first_aio; +} AIOState; + +AIODriver* posix_aio_init(void); + +#endif /* QEMU_BLOCK_AIO_H */ diff --git a/block-raw-posix.c b/block-raw-posix.c index 41f9976..cab7094 100644 --- a/block-raw-posix.c +++ b/block-raw-posix.c @@ -25,11 +25,8 @@ #include "qemu-timer.h" #include "qemu-char.h" #include "block_int.h" -#include "compatfd.h" +#include "block-aio.h" #include -#ifdef CONFIG_AIO -#include -#endif #ifdef CONFIG_COCOA #include @@ -84,25 +81,6 @@ reopen it to see if the disk has been changed */ #define FD_OPEN_TIMEOUT 1000 -typedef struct BDRVRawState { - int fd; - int type; - unsigned int lseek_err_cnt; -#if defined(__linux__) - /* linux floppy specific */ - int fd_open_flags; - int64_t fd_open_time; - int64_t fd_error_time; - int fd_got_error; - int fd_media_changed; -#endif -#if defined(O_DIRECT) - uint8_t* aligned_buf; -#endif -} BDRVRawState; - -static int posix_aio_init(void); - static int fd_open(BlockDriverState *bs); static int raw_open(BlockDriverState *bs, const char *filename, int flags) @@ -110,8 +88,6 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) BDRVRawState *s = bs->opaque; int fd, open_flags, ret; - posix_aio_init(); - s->lseek_err_cnt = 0; open_flags = O_BINARY; @@ -149,6 +125,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) } } #endif + /* init aio driver for this block device */ + s->aio_dvr = posix_aio_init(); return 0; } @@ -429,166 +407,6 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset, #define raw_pwrite raw_pwrite_aligned #endif - -#ifdef CONFIG_AIO -/***********************************************************/ -/* Unix AIO using POSIX AIO */ - -typedef struct RawAIOCB { - BlockDriverAIOCB common; - struct aiocb aiocb; - struct RawAIOCB *next; - int ret; -} RawAIOCB; - -typedef struct PosixAioState -{ - int fd; - RawAIOCB *first_aio; -} PosixAioState; - -static void posix_aio_read(void *opaque) -{ - PosixAioState *s = opaque; - RawAIOCB *acb, **pacb; - int ret; - size_t offset; - union { - struct qemu_signalfd_siginfo siginfo; - char buf[128]; - } sig; - - /* try to read from signalfd, don't freak out if we can't read anything */ - offset = 0; - while (offset < 128) { - ssize_t len; - - len = read(s->fd, sig.buf + offset, 128 - offset); - if (len == -1 && errno == EINTR) - continue; - if (len == -1 && errno == EAGAIN) { - /* there is no natural reason for this to happen, - * so we'll spin hard until we get everything just - * to be on the safe side. */ - if (offset > 0) - continue; - } - - offset += len; - } - - for(;;) { - pacb = &s->first_aio; - for(;;) { - acb = *pacb; - if (!acb) - goto the_end; - ret = aio_error(&acb->aiocb); - if (ret == ECANCELED) { - /* remove the request */ - *pacb = acb->next; - qemu_aio_release(acb); - } else if (ret != EINPROGRESS) { - /* end of aio */ - if (ret == 0) { - ret = aio_return(&acb->aiocb); - if (ret == acb->aiocb.aio_nbytes) - ret = 0; - else - ret = -EINVAL; - } else { - ret = -ret; - } - /* remove the request */ - *pacb = acb->next; - /* call the callback */ - acb->common.cb(acb->common.opaque, ret); - qemu_aio_release(acb); - break; - } else { - pacb = &acb->next; - } - } - } - the_end: ; -} - -static int posix_aio_flush(void *opaque) -{ - PosixAioState *s = opaque; - return !!s->first_aio; -} - -static PosixAioState *posix_aio_state; - -static int posix_aio_init(void) -{ - sigset_t mask; - PosixAioState *s; - - if (posix_aio_state) - return 0; - - s = qemu_malloc(sizeof(PosixAioState)); - if (s == NULL) - return -ENOMEM; - - /* Make sure to block AIO signal */ - sigemptyset(&mask); - sigaddset(&mask, SIGUSR2); - sigprocmask(SIG_BLOCK, &mask, NULL); - - s->first_aio = NULL; - s->fd = qemu_signalfd(&mask); - - fcntl(s->fd, F_SETFL, O_NONBLOCK); - - qemu_aio_set_fd_handler(s->fd, posix_aio_read, NULL, posix_aio_flush, s); - -#if defined(__GLIBC__) && defined(__linux__) - { - /* XXX: aio thread exit seems to hang on RedHat 9 and this init - seems to fix the problem. */ - struct aioinit ai; - memset(&ai, 0, sizeof(ai)); - ai.aio_threads = 1; - ai.aio_num = 1; - ai.aio_idle_time = 365 * 100000; - aio_init(&ai); - } -#endif - posix_aio_state = s; - - return 0; -} - -static RawAIOCB *raw_aio_setup(BlockDriverState *bs, - int64_t sector_num, uint8_t *buf, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) -{ - BDRVRawState *s = bs->opaque; - RawAIOCB *acb; - - if (fd_open(bs) < 0) - return NULL; - - acb = qemu_aio_get(bs, cb, opaque); - if (!acb) - return NULL; - acb->aiocb.aio_fildes = s->fd; - acb->aiocb.aio_sigevent.sigev_signo = SIGUSR2; - acb->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL; - acb->aiocb.aio_buf = buf; - if (nb_sectors < 0) - acb->aiocb.aio_nbytes = -nb_sectors; - else - acb->aiocb.aio_nbytes = nb_sectors * 512; - acb->aiocb.aio_offset = sector_num * 512; - acb->next = posix_aio_state->first_aio; - posix_aio_state->first_aio = acb; - return acb; -} - static void raw_aio_em_cb(void* opaque) { RawAIOCB *acb = opaque; @@ -601,14 +419,13 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { RawAIOCB *acb; + BDRVRawState *s = bs->opaque; /* * If O_DIRECT is used and the buffer is not aligned fall back * to synchronous IO. */ #if defined(O_DIRECT) - BDRVRawState *s = bs->opaque; - if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { QEMUBH *bh; acb = qemu_aio_get(bs, cb, opaque); @@ -619,13 +436,14 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, } #endif - acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque); - if (!acb) + if (fd_open(bs) < 0) return NULL; - if (aio_read(&acb->aiocb) < 0) { - qemu_aio_release(acb); + + /* submit read */ + acb = s->aio_dvr->submit(bs, s->fd, sector_num, buf, nb_sectors, 0, cb, + opaque); + if (!acb) return NULL; - } return &acb->common; } @@ -634,13 +452,13 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { RawAIOCB *acb; + BDRVRawState *s = bs->opaque; /* * If O_DIRECT is used and the buffer is not aligned fall back * to synchronous IO. */ #if defined(O_DIRECT) - BDRVRawState *s = bs->opaque; if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { QEMUBH *bh; @@ -652,48 +470,19 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, } #endif - acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque); + /* submit write */ + acb = s->aio_dvr->submit(bs, s->fd, sector_num, buf, nb_sectors, 1, cb, + opaque); if (!acb) return NULL; - if (aio_write(&acb->aiocb) < 0) { - qemu_aio_release(acb); - return NULL; - } return &acb->common; } static void raw_aio_cancel(BlockDriverAIOCB *blockacb) { - int ret; - RawAIOCB *acb = (RawAIOCB *)blockacb; - RawAIOCB **pacb; - - ret = aio_cancel(acb->aiocb.aio_fildes, &acb->aiocb); - if (ret == AIO_NOTCANCELED) { - /* fail safe: if the aio could not be canceled, we wait for - it */ - while (aio_error(&acb->aiocb) == EINPROGRESS); - } - - /* remove the callback from the queue */ - pacb = &posix_aio_state->first_aio; - for(;;) { - if (*pacb == NULL) { - break; - } else if (*pacb == acb) { - *pacb = acb->next; - qemu_aio_release(acb); - break; - } - pacb = &acb->next; - } -} - -#else /* CONFIG_AIO */ -static int posix_aio_init(void) -{ + BDRVRawState *s = blockacb->bs->opaque; + s->aio_dvr->cancel(blockacb); } -#endif /* CONFIG_AIO */ static void raw_close(BlockDriverState *bs) { @@ -898,8 +687,6 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) BDRVRawState *s = bs->opaque; int fd, open_flags, ret; - posix_aio_init(); - #ifdef CONFIG_COCOA if (strstart(filename, "/dev/cdrom", NULL)) { kern_return_t kernResult; @@ -969,6 +756,8 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) s->fd_media_changed = 1; } #endif + /* init aio driver for this block device */ + s->aio_dvr = posix_aio_init(); return 0; }