* [Qemu-devel] [PATCH][RFC] Refactor AIO to allow multiple AIO implementations
@ 2008-09-10 15:49 Anthony Liguori
2008-09-11 7:48 ` [Qemu-devel] " Gerd Hoffmann
0 siblings, 1 reply; 7+ messages in thread
From: Anthony Liguori @ 2008-09-10 15:49 UTC (permalink / raw)
To: qemu-devel; +Cc: Anthony Liguori, Ryan Harper, kvm
This patch refactors the AIO layer to allow multiple AIO implementations. It's
only possible because of the recent signalfd() patch.
Right now, the AIO infrastructure is pretty specific to the block raw backend.
For other block devices to implement AIO, the qemu_aio_wait function must support
registration. This patch introduces a new function, qmeu_aio_set_fd_handler, which
can be used to register a file descriptor to be called back. qemu_aio_wait() now
polls a set of file descriptors registered with this function until one becomes
readable or writable.
This patch should allow the implementation of alternative AIO backends (via a
thread pool or linux-aio) and AIO backends in non-traditional block devices (like NBD).
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
diff --git a/Makefile b/Makefile
index e676900..e8d4594 100644
--- a/Makefile
+++ b/Makefile
@@ -173,7 +173,7 @@ libqemu_user.a: $(USER_OBJS)
rm -f $@
$(AR) rcs $@ $(USER_OBJS)
-QEMU_IMG_BLOCK_OBJS = $(BLOCK_OBJS)
+QEMU_IMG_BLOCK_OBJS = $(BLOCK_OBJS) qemu-img-aio.o
ifdef CONFIG_WIN32
QEMU_IMG_BLOCK_OBJS += qemu-img-block-raw-win32.o
else
@@ -194,7 +194,7 @@ qemu-img-%.o: %.c
qemu-nbd-%.o: %.c
$(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_NBD -c -o $@ $<
-qemu-nbd$(EXESUF): qemu-nbd.o qemu-nbd-nbd.o qemu-img-block.o \
+qemu-nbd$(EXESUF): qemu-nbd.o qemu-nbd-nbd.o qemu-img-block.o qemu-img-aio.o \
osdep.o qemu-nbd-block-raw-posix.o compatfd.o $(BLOCK_OBJS)
$(CC) $(LDFLAGS) -o $@ $^ -lz $(LIBS)
diff --git a/Makefile.target b/Makefile.target
index 27a9eb6..3932227 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -472,7 +472,7 @@ endif #CONFIG_DARWIN_USER
# System emulator target
ifndef CONFIG_USER_ONLY
-OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o net-checksum.o
+OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o net-checksum.o aio.o
ifdef CONFIG_WIN32
OBJS+=block-raw-win32.o
else
diff --git a/aio.c b/aio.c
new file mode 100644
index 0000000..74f875c
--- /dev/null
+++ b/aio.c
@@ -0,0 +1,199 @@
+/*
+ * QEMU aio implementation
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "block.h"
+#include "sys-queue.h"
+
+typedef struct AioHandler AioHandler;
+
+/* The list of registered AIO handlers */
+static LIST_HEAD(, AioHandler) aio_handlers;
+
+/* This is a simple lock used to protect the aio_handlers list. Specifically,
+ * it's used to ensure that no callbacks are removed while we're walking and
+ * dispatching callbacks.
+ */
+static int walking_handlers;
+
+struct AioHandler
+{
+ int fd;
+ IOHandler *io_read;
+ IOHandler *io_write;
+ AioFlushHandler *io_flush;
+ int deleted;
+ void *opaque;
+ LIST_ENTRY(AioHandler) node;
+};
+
+static AioHandler *find_aio_handler(int fd)
+{
+ AioHandler *node;
+
+ LIST_FOREACH(node, &aio_handlers, node) {
+ if (node->fd == fd)
+ return node;
+ }
+
+ return NULL;
+}
+
+int qemu_aio_set_fd_handler(int fd,
+ IOHandler *io_read,
+ IOHandler *io_write,
+ AioFlushHandler *io_flush,
+ void *opaque)
+{
+ AioHandler *node;
+
+ node = find_aio_handler(fd);
+
+ /* Are we deleting the fd handler? */
+ if (!io_read && !io_write) {
+ if (node) {
+ /* If the lock is held, just mark the node as deleted */
+ if (walking_handlers)
+ node->deleted = 1;
+ else {
+ /* Otherwise, delete it for real. We can't just mark it as
+ * deleted because deleted nodes are only cleaned up after
+ * releasing the walking_handlers lock.
+ */
+ LIST_REMOVE(node, node);
+ qemu_free(node);
+ }
+ }
+ } else {
+ if (node == NULL) {
+ /* Alloc and insert if it's not already there */
+ node = qemu_mallocz(sizeof(AioHandler));
+ if (node == NULL)
+ return -ENOMEM;
+ node->fd = fd;
+ LIST_INSERT_HEAD(&aio_handlers, node, node);
+ }
+ /* Update handler with latest information */
+ node->io_read = io_read;
+ node->io_write = io_write;
+ node->io_flush = io_flush;
+ node->opaque = opaque;
+ }
+
+ /* Normally, we use the normal fd_handler to dispatch IO events. However,
+ * this isn't available with QEMU_IMG or QEMU_NBD so the callbacks will
+ * only be invoked from qemu_aio_wait/qemu_aio_flush
+ */
+#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
+ qemu_set_fd_handler2(fd, NULL, io_read, io_write, opaque);
+#endif
+
+ return 0;
+}
+
+void qemu_aio_flush(void)
+{
+ AioHandler *node;
+ int ret;
+
+ do {
+ ret = 0;
+
+ LIST_FOREACH(node, &aio_handlers, node) {
+ ret |= node->io_flush(node->opaque);
+ }
+
+ qemu_aio_wait();
+ } while (ret > 0);
+}
+
+void qemu_aio_wait(void)
+{
+ int ret;
+
+#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
+ if (qemu_bh_poll())
+ return;
+#endif
+
+ do {
+ AioHandler *node;
+ fd_set rdfds, wrfds;
+ int max_fd = -1;
+
+ walking_handlers = 1;
+
+ /* fill fd sets */
+ LIST_FOREACH(node, &aio_handlers, node) {
+ /* If there aren't pending AIO operations, don't invoke callbacks.
+ * Otherwise, if there are no AIO requests, qemu_aio_wait() would
+ * wait indefinitely.
+ */
+ if (node->io_flush && node->io_flush(node->opaque) == 0)
+ continue;
+
+ if (!node->deleted && node->io_read) {
+ FD_SET(node->fd, &rdfds);
+ max_fd = MAX(max_fd, node->fd + 1);
+ }
+ if (!node->deleted && node->io_write) {
+ FD_SET(node->fd, &wrfds);
+ max_fd = MAX(max_fd, node->fd + 1);
+ }
+ }
+
+ walking_handlers = 0;
+
+ /* No AIO operations? Get us out of here */
+ if (max_fd == -1)
+ break;
+
+ /* wait until next event */
+ ret = select(max_fd, &rdfds, &wrfds, NULL, NULL);
+ if (ret == -1 && errno == EINTR)
+ continue;
+
+ /* if we have any readable fds, dispatch event */
+ if (ret > 0) {
+ walking_handlers = 1;
+
+ /* we have to walk very carefully in case
+ * qemu_aio_set_fd_handler is called while we're walking */
+ node = LIST_FIRST(&aio_handlers);
+ while (node) {
+ AioHandler *tmp;
+
+ if (!node->deleted &&
+ FD_ISSET(node->fd, &rdfds) &&
+ node->io_read) {
+ node->io_read(node->opaque);
+ }
+ if (!node->deleted &&
+ FD_ISSET(node->fd, &wrfds) &&
+ node->io_write) {
+ node->io_write(node->opaque);
+ }
+
+ tmp = node;
+ node = LIST_NEXT(node, node);
+
+ if (tmp->deleted) {
+ LIST_REMOVE(tmp, node);
+ qemu_free(tmp);
+ }
+ }
+
+ walking_handlers = 0;
+ }
+ } while (ret == 0);
+}
diff --git a/block-raw-posix.c b/block-raw-posix.c
index d5a4514..52ee49e 100644
--- a/block-raw-posix.c
+++ b/block-raw-posix.c
@@ -103,6 +103,8 @@ typedef struct BDRVRawState {
#endif
} BDRVRawState;
+static int posix_aio_init(void);
+
static int fd_open(BlockDriverState *bs);
static int raw_open(BlockDriverState *bs, const char *filename, int flags)
@@ -110,6 +112,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
BDRVRawState *s = bs->opaque;
int fd, open_flags, ret;
+ posix_aio_init();
+
s->lseek_err_cnt = 0;
open_flags = O_BINARY;
@@ -439,18 +443,20 @@ typedef struct RawAIOCB {
int ret;
} RawAIOCB;
-static int aio_sig_fd = -1;
-static int aio_sig_num = SIGUSR2;
-static RawAIOCB *first_aio; /* AIO issued */
-static int aio_initialized = 0;
+typedef struct PosixAioState
+{
+ int fd;
+ RawAIOCB *first_aio;
+} PosixAioState;
-static void qemu_aio_poll(void *opaque)
+static void posix_aio_read(void *opaque)
{
+ PosixAioState *s = opaque;
RawAIOCB *acb, **pacb;
int ret;
for(;;) {
- pacb = &first_aio;
+ pacb = &s->first_aio;
for(;;) {
acb = *pacb;
if (!acb)
@@ -485,21 +491,35 @@ static void qemu_aio_poll(void *opaque)
the_end: ;
}
-void qemu_aio_init(void)
+static int posix_aio_flush(void *opaque)
+{
+ PosixAioState *s = opaque;
+ return !s->first_aio;
+}
+
+static PosixAioState *posix_aio_state;
+
+static int posix_aio_init(void)
{
sigset_t mask;
+ PosixAioState *s;
+
+ if (posix_aio_state)
+ return 0;
- aio_initialized = 1;
+ s = qemu_malloc(sizeof(PosixAioState));
+ if (s == NULL)
+ return -ENOMEM;
/* Make sure to block AIO signal */
sigemptyset(&mask);
- sigaddset(&mask, aio_sig_num);
+ sigaddset(&mask, SIGUSR2);
sigprocmask(SIG_BLOCK, &mask, NULL);
- aio_sig_fd = qemu_signalfd(&mask);
-#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
- qemu_set_fd_handler2(aio_sig_fd, NULL, qemu_aio_poll, NULL, NULL);
-#endif
+ s->first_aio = NULL;
+ s->fd = qemu_signalfd(&mask);
+
+ qemu_aio_set_fd_handler(s->fd, posix_aio_read, NULL, posix_aio_flush, s);
#if defined(__GLIBC__) && defined(__linux__)
{
@@ -513,38 +533,10 @@ void qemu_aio_init(void)
aio_init(&ai);
}
#endif
-}
-/* Wait for all IO requests to complete. */
-void qemu_aio_flush(void)
-{
- qemu_aio_poll(NULL);
- while (first_aio) {
- qemu_aio_wait();
- }
-}
+ posix_aio_state = s;
-void qemu_aio_wait(void)
-{
- int ret;
-
-#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
- if (qemu_bh_poll())
- return;
-#endif
-
- do {
- fd_set rdfds;
-
- FD_ZERO(&rdfds);
- FD_SET(aio_sig_fd, &rdfds);
-
- ret = select(aio_sig_fd + 1, &rdfds, NULL, NULL, NULL);
- if (ret == -1 && errno == EINTR)
- continue;
- } while (ret == 0);
-
- qemu_aio_poll(NULL);
+ return 0;
}
static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
@@ -561,7 +553,7 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
if (!acb)
return NULL;
acb->aiocb.aio_fildes = s->fd;
- acb->aiocb.aio_sigevent.sigev_signo = aio_sig_num;
+ acb->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
acb->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
acb->aiocb.aio_buf = buf;
if (nb_sectors < 0)
@@ -569,8 +561,8 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
else
acb->aiocb.aio_nbytes = nb_sectors * 512;
acb->aiocb.aio_offset = sector_num * 512;
- acb->next = first_aio;
- first_aio = acb;
+ acb->next = posix_aio_state->first_aio;
+ posix_aio_state->first_aio = acb;
return acb;
}
@@ -663,7 +655,7 @@ static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
}
/* remove the callback from the queue */
- pacb = &first_aio;
+ pacb = &posix_aio_state->first_aio;
for(;;) {
if (*pacb == NULL) {
break;
@@ -678,19 +670,9 @@ static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
# else /* CONFIG_AIO */
-void qemu_aio_init(void)
-{
-}
-
-void qemu_aio_flush(void)
-{
-}
-
-void qemu_aio_wait(void)
+static int posix_aio_init(void)
{
-#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
- qemu_bh_poll();
-#endif
+ return 0;
}
#endif /* CONFIG_AIO */
@@ -899,6 +881,8 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
BDRVRawState *s = bs->opaque;
int fd, open_flags, ret;
+ posix_aio_init();
+
#ifdef CONFIG_COCOA
if (strstart(filename, "/dev/cdrom", NULL)) {
kern_return_t kernResult;
diff --git a/block.h b/block.h
index 0443585..5064d37 100644
--- a/block.h
+++ b/block.h
@@ -1,6 +1,8 @@
#ifndef BLOCK_H
#define BLOCK_H
+#include "qemu-aio.h"
+
/* block.c */
typedef struct BlockDriver BlockDriver;
@@ -89,10 +91,6 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
BlockDriverCompletionFunc *cb, void *opaque);
void bdrv_aio_cancel(BlockDriverAIOCB *acb);
-void qemu_aio_init(void);
-void qemu_aio_flush(void);
-void qemu_aio_wait(void);
-
int qemu_key_check(BlockDriverState *bs, const char *name);
/* Ensure contents are flushed to disk. */
diff --git a/qemu-aio.h b/qemu-aio.h
new file mode 100644
index 0000000..7967829
--- /dev/null
+++ b/qemu-aio.h
@@ -0,0 +1,45 @@
+/*
+ * QEMU aio implementation
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_AIO_H
+#define QEMU_AIO_H
+
+#include "qemu-common.h"
+#include "qemu-char.h"
+
+/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
+typedef int (AioFlushHandler)(void *opaque);
+
+/* Flush any pending AIO operation. This function will block until all
+ * outstanding AIO operations have been completed or cancelled. */
+void qemu_aio_flush(void);
+
+/* Wait for a single AIO completion to occur. This function will until a
+ * single AIO opeartion has completed. It is intended to be used as a looping
+ * primative when simulating synchronous IO based on asynchronous IO. */
+void qemu_aio_wait(void);
+
+/* Register a file descriptor and associated callbacks. Behaves very similarly
+ * to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will
+ * be invoked when using either qemu_aio_wait() or qemu_aio_flush().
+ *
+ * Code that invokes AIO completion functions should rely on this function
+ * instead of qemu_set_fd_handler[2].
+ */
+int qemu_aio_set_fd_handler(int fd,
+ IOHandler *io_read,
+ IOHandler *io_write,
+ AioFlushHandler *io_flush,
+ void *opaque);
+
+#endif
diff --git a/vl.c b/vl.c
index 6f51d53..3124b3f 100644
--- a/vl.c
+++ b/vl.c
@@ -8948,7 +8948,6 @@ int main(int argc, char **argv)
init_timers();
init_timer_alarm();
- qemu_aio_init();
if (use_icount && icount_time_shift < 0) {
use_icount = 2;
/* 125MIPS seems a reasonable initial guess at the guest speed.
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Qemu-devel] Re: [PATCH][RFC] Refactor AIO to allow multiple AIO implementations
2008-09-10 15:49 [Qemu-devel] [PATCH][RFC] Refactor AIO to allow multiple AIO implementations Anthony Liguori
@ 2008-09-11 7:48 ` Gerd Hoffmann
2008-09-11 12:45 ` Anthony Liguori
0 siblings, 1 reply; 7+ messages in thread
From: Gerd Hoffmann @ 2008-09-11 7:48 UTC (permalink / raw)
To: Anthony Liguori; +Cc: Ryan Harper, qemu-devel, kvm
Anthony Liguori wrote:
> This patch refactors the AIO layer to allow multiple AIO implementations. It's
> only possible because of the recent signalfd() patch.
>
> Right now, the AIO infrastructure is pretty specific to the block raw backend.
> For other block devices to implement AIO, the qemu_aio_wait function must support
> registration. This patch introduces a new function, qmeu_aio_set_fd_handler, which
> can be used to register a file descriptor to be called back. qemu_aio_wait() now
> polls a set of file descriptors registered with this function until one becomes
> readable or writable.
Hmm, what is the long-term plan for this? Separating out the completion
notification is a nice first step. Most of the aio infrastructure is
still in the block raw backend though. IMHO it doesn't belong there.
The aio implementation(s) should be a separate entity usable by all
block backends to invoke aio requests, without having to care about how
this is actually implemented (threads, linux aio, posix aio, whatever).
cheers,
Gerd
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Qemu-devel] Re: [PATCH][RFC] Refactor AIO to allow multiple AIO implementations
2008-09-11 7:48 ` [Qemu-devel] " Gerd Hoffmann
@ 2008-09-11 12:45 ` Anthony Liguori
2008-09-11 13:15 ` Gerd Hoffmann
0 siblings, 1 reply; 7+ messages in thread
From: Anthony Liguori @ 2008-09-11 12:45 UTC (permalink / raw)
To: Gerd Hoffmann; +Cc: Ryan Harper, qemu-devel, kvm
Gerd Hoffmann wrote:
> Anthony Liguori wrote:
>
>> This patch refactors the AIO layer to allow multiple AIO implementations. It's
>> only possible because of the recent signalfd() patch.
>>
>> Right now, the AIO infrastructure is pretty specific to the block raw backend.
>> For other block devices to implement AIO, the qemu_aio_wait function must support
>> registration. This patch introduces a new function, qmeu_aio_set_fd_handler, which
>> can be used to register a file descriptor to be called back. qemu_aio_wait() now
>> polls a set of file descriptors registered with this function until one becomes
>> readable or writable.
>>
>
> Hmm, what is the long-term plan for this?
Step one is to move the generic aio bits out of block-raw-posix (which
this patch does).
Step two is to move the posix-aio routines out of block-raw-posix.
Step three would be to add a generic interface to allow block-raw-posix
to use multiple aio implementations
Step four would be to add a new aio implementation (I think the best
route is a thread-pool based implementation).
Regards,
Anthony Liguori
> Separating out the completion
> notification is a nice first step. Most of the aio infrastructure is
> still in the block raw backend though. IMHO it doesn't belong there.
>
> The aio implementation(s) should be a separate entity usable by all
> block backends to invoke aio requests, without having to care about how
> this is actually implemented (threads, linux aio, posix aio, whatever).
>
> cheers,
> Gerd
>
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Qemu-devel] Re: [PATCH][RFC] Refactor AIO to allow multiple AIO implementations
2008-09-11 12:45 ` Anthony Liguori
@ 2008-09-11 13:15 ` Gerd Hoffmann
2008-09-11 13:28 ` Jamie Lokier
2008-09-11 14:04 ` Anthony Liguori
0 siblings, 2 replies; 7+ messages in thread
From: Gerd Hoffmann @ 2008-09-11 13:15 UTC (permalink / raw)
To: Anthony Liguori; +Cc: Ryan Harper, qemu-devel, kvm
Anthony Liguori wrote:
> Gerd Hoffmann wrote:
>> Hmm, what is the long-term plan for this?
>
> Step one is to move the generic aio bits out of block-raw-posix (which
> this patch does).
>
> Step two is to move the posix-aio routines out of block-raw-posix.
>
> Step three would be to add a generic interface to allow block-raw-posix
> to use multiple aio implementations
>
> Step four would be to add a new aio implementation
Sounds good.
> (I think the best
> route is a thread-pool based implementation).
Not sure about that. linux-aio would have the advantage that the kernel
knows about all the requests in flight and probably can do a better job
on I/O ordering and scheduling then. But once we can have multiple
different implementations we can just try ;)
cheers,
Gerd
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] Re: [PATCH][RFC] Refactor AIO to allow multiple AIO implementations
2008-09-11 13:15 ` Gerd Hoffmann
@ 2008-09-11 13:28 ` Jamie Lokier
2008-09-11 14:09 ` Anthony Liguori
2008-09-11 14:04 ` Anthony Liguori
1 sibling, 1 reply; 7+ messages in thread
From: Jamie Lokier @ 2008-09-11 13:28 UTC (permalink / raw)
To: qemu-devel; +Cc: Anthony Liguori, Ryan Harper, kvm
Gerd Hoffmann wrote:
> > (I think the best
> > route is a thread-pool based implementation).
>
> Not sure about that. linux-aio would have the advantage that the kernel
> knows about all the requests in flight and probably can do a better job
> on I/O ordering and scheduling then. But once we can have multiple
> different implementations we can just try ;)
Won't posix-aio give the same info to the kernel when used with a
sufficiently avante-garde Linux distro?
I'm under the impression that linux-aio is better in every way, as
I think Anthony Liguori posted a while back:
>>> Threads are a poor substitute for a proper AIO interface.
>>> linux-aio gives you everything you could possibly want in an
>>> interface since it allows you to submit multiple vectored operations
>>> in a single syscall, use an fd to signal request completion,
>>> complete multiple requests in a single syscall, and inject barriers
>>> via fdsync.
But knowing about request in flight, I/O ordering etc. seem equally
available via posix-aio on a distro where that calls linux-aio
(i.e. not the Glibc implementation).
-- Jamie
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] Re: [PATCH][RFC] Refactor AIO to allow multiple AIO implementations
2008-09-11 13:28 ` Jamie Lokier
@ 2008-09-11 14:09 ` Anthony Liguori
0 siblings, 0 replies; 7+ messages in thread
From: Anthony Liguori @ 2008-09-11 14:09 UTC (permalink / raw)
To: Jamie Lokier; +Cc: Ryan Harper, qemu-devel, kvm
Jamie Lokier wrote:
> I'm under the impression that linux-aio is better in every way, as
> I think Anthony Liguori posted a while back:
>
>
>>>> Threads are a poor substitute for a proper AIO interface.
>>>> linux-aio gives you everything you could possibly want in an
>>>> interface since it allows you to submit multiple vectored operations
>>>> in a single syscall, use an fd to signal request completion,
>>>> complete multiple requests in a single syscall, and inject barriers
>>>> via fdsync.
>>>>
Well that Anthony Liguori guy is obviously smoking crack.
The problem with linux-aio is that it doesn't work unless you open a
file with O_DIRECT. There is no simple way to make it work with
buffered files either because of the silliness of the vfs interface.
I know buffered vs. unbuffered IO is a source of contention for some but
if you're doing largely homogeneous virtualization, and you happen to
have common storage (either via a common backing file or something more
sophisticated), buffered IO is going to make a big difference.
There are at least three thread pool implementations in the kernel to
allow asynchronous IO to buffered files. If the kernel is doing this
internally, userspace has no hope of being able to do it differently.
BTW, my current thinking for a thread pool implementation would actually
use linux-aio in each thread, but using it to emulate preadv/pwritev.
Regards,
Anthony Liguori
> But knowing about request in flight, I/O ordering etc. seem equally
> available via posix-aio on a distro where that calls linux-aio
> (i.e. not the Glibc implementation).
>
> -- Jamie
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Qemu-devel] Re: [PATCH][RFC] Refactor AIO to allow multiple AIO implementations
2008-09-11 13:15 ` Gerd Hoffmann
2008-09-11 13:28 ` Jamie Lokier
@ 2008-09-11 14:04 ` Anthony Liguori
1 sibling, 0 replies; 7+ messages in thread
From: Anthony Liguori @ 2008-09-11 14:04 UTC (permalink / raw)
To: Gerd Hoffmann; +Cc: Ryan Harper, qemu-devel, kvm
Gerd Hoffmann wrote:
> Anthony Liguori wrote:
>
>> Gerd Hoffmann wrote:
>>
>>> Hmm, what is the long-term plan for this?
>>>
>> Step one is to move the generic aio bits out of block-raw-posix (which
>> this patch does).
>>
>> Step two is to move the posix-aio routines out of block-raw-posix.
>>
>> Step three would be to add a generic interface to allow block-raw-posix
>> to use multiple aio implementations
>>
>> Step four would be to add a new aio implementation
>>
>
> Sounds good.
>
>
>> (I think the best
>> route is a thread-pool based implementation).
>>
>
> Not sure about that. linux-aio would have the advantage that the kernel
> knows about all the requests in flight and probably can do a better job
> on I/O ordering and scheduling then. But once we can have multiple
> different implementations we can just try ;)
>
But linux-aio doesn't work unless you're using O_DIRECT so for a lot of
users, it's not very helpful.
Regards,
Anthony Liguori
> cheers,
> Gerd
>
>
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2008-09-11 14:10 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-09-10 15:49 [Qemu-devel] [PATCH][RFC] Refactor AIO to allow multiple AIO implementations Anthony Liguori
2008-09-11 7:48 ` [Qemu-devel] " Gerd Hoffmann
2008-09-11 12:45 ` Anthony Liguori
2008-09-11 13:15 ` Gerd Hoffmann
2008-09-11 13:28 ` Jamie Lokier
2008-09-11 14:09 ` Anthony Liguori
2008-09-11 14:04 ` Anthony Liguori
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).