* [Qemu-devel] [PATCH] Implement an fd pool to get real AIO with posix-aio
@ 2008-09-23 20:34 Anthony Liguori
2008-09-24 9:02 ` [Qemu-devel] " Avi Kivity
0 siblings, 1 reply; 3+ messages in thread
From: Anthony Liguori @ 2008-09-23 20:34 UTC (permalink / raw)
To: qemu-devel; +Cc: Anthony Liguori, Ryan Harper, Avi Kivity, kvm
This patch implements a simple fd pool to allow many AIO requests with
posix-aio. The result is significantly improved performance (identical to that
reported for linux-aio) for both cache=on and cache=off.
The fundamental problem with posix-aio is that it limits itself to one thread
per-file descriptor. I don't know why this is, but this patch provides a simple
mechanism to work around this (duplicating the file descriptor).
This isn't a great solution, but it seems like a reasonable intermediate step
between posix-aio and a custom thread-pool to replace it.
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Index: block-raw-posix.c
===================================================================
--- block-raw-posix.c (revision 5304)
+++ block-raw-posix.c (working copy)
@@ -84,10 +84,16 @@
reopen it to see if the disk has been changed */
#define FD_OPEN_TIMEOUT 1000
+/* posix-aio doesn't allow multiple outstanding requests to a single file
+ * descriptor. we implement a pool of dup()'d file descriptors to work
+ * around this */
+#define RAW_FD_POOL_SIZE 16
+
typedef struct BDRVRawState {
int fd;
int type;
unsigned int lseek_err_cnt;
+ int fd_pool[RAW_FD_POOL_SIZE];
#if defined(__linux__)
/* linux floppy specific */
int fd_open_flags;
@@ -109,6 +115,7 @@
{
BDRVRawState *s = bs->opaque;
int fd, open_flags, ret;
+ int i;
posix_aio_init();
@@ -138,6 +145,8 @@
return ret;
}
s->fd = fd;
+ for (i = 0; i < RAW_FD_POOL_SIZE; i++)
+ s->fd_pool[i] = -1;
#if defined(O_DIRECT)
s->aligned_buf = NULL;
if (flags & BDRV_O_DIRECT) {
@@ -436,6 +445,7 @@
typedef struct RawAIOCB {
BlockDriverAIOCB common;
+ int fd;
struct aiocb aiocb;
struct RawAIOCB *next;
int ret;
@@ -447,6 +457,38 @@
RawAIOCB *first_aio;
} PosixAioState;
+static int raw_fd_pool_get(BDRVRawState *s)
+{
+ int i;
+
+ for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
+ /* already in use */
+ if (s->fd_pool[i] != -1)
+ continue;
+
+ /* try to dup file descriptor */
+ s->fd_pool[i] = dup(s->fd);
+ if (s->fd_pool[i] != -1)
+ return s->fd_pool[i];
+ }
+
+ /* we couldn't dup the file descriptor so just use the main one */
+ return s->fd;
+}
+
+static void raw_fd_pool_put(RawAIOCB *acb)
+{
+ BDRVRawState *s = acb->common.bs->opaque;
+ int i;
+
+ for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
+ if (s->fd_pool[i] == acb->fd) {
+ close(s->fd_pool[i]);
+ s->fd_pool[i] = -1;
+ }
+ }
+}
+
static void posix_aio_read(void *opaque)
{
PosixAioState *s = opaque;
@@ -487,6 +529,7 @@
if (ret == ECANCELED) {
/* remove the request */
*pacb = acb->next;
+ raw_fd_pool_put(acb);
qemu_aio_release(acb);
} else if (ret != EINPROGRESS) {
/* end of aio */
@@ -503,6 +546,7 @@
*pacb = acb->next;
/* call the callback */
acb->common.cb(acb->common.opaque, ret);
+ raw_fd_pool_put(acb);
qemu_aio_release(acb);
break;
} else {
@@ -575,7 +619,8 @@
acb = qemu_aio_get(bs, cb, opaque);
if (!acb)
return NULL;
- acb->aiocb.aio_fildes = s->fd;
+ acb->fd = raw_fd_pool_get(s);
+ acb->aiocb.aio_fildes = acb->fd;
acb->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
acb->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
acb->aiocb.aio_buf = buf;
@@ -682,6 +727,7 @@
break;
} else if (*pacb == acb) {
*pacb = acb->next;
+ raw_fd_pool_put(acb);
qemu_aio_release(acb);
break;
}
@@ -695,6 +741,18 @@
}
#endif /* CONFIG_AIO */
+static void raw_close_fd_pool(BDRVRawState *s)
+{
+ int i;
+
+ for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
+ if (s->fd_pool[i] != -1) {
+ close(s->fd_pool[i]);
+ s->fd_pool[i] = -1;
+ }
+ }
+}
+
static void raw_close(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
@@ -706,6 +764,7 @@
qemu_free(s->aligned_buf);
#endif
}
+ raw_close_fd_pool(s);
}
static int raw_truncate(BlockDriverState *bs, int64_t offset)
@@ -973,7 +1032,6 @@
}
#if defined(__linux__)
-
/* Note: we do not have a reliable method to detect if the floppy is
present. The current method is to try to open the floppy at every
I/O and to keep it opened during a few hundreds of ms. */
@@ -989,6 +1047,7 @@
(qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
close(s->fd);
s->fd = -1;
+ raw_close_fd_pool(s);
#ifdef DEBUG_FLOPPY
printf("Floppy closed\n");
#endif
@@ -1089,6 +1148,7 @@
if (s->fd >= 0) {
close(s->fd);
s->fd = -1;
+ raw_close_fd_pool(s);
}
fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK);
if (fd >= 0) {
^ permalink raw reply [flat|nested] 3+ messages in thread
* [Qemu-devel] Re: [PATCH] Implement an fd pool to get real AIO with posix-aio
2008-09-23 20:34 [Qemu-devel] [PATCH] Implement an fd pool to get real AIO with posix-aio Anthony Liguori
@ 2008-09-24 9:02 ` Avi Kivity
2008-09-24 14:17 ` Anthony Liguori
0 siblings, 1 reply; 3+ messages in thread
From: Avi Kivity @ 2008-09-24 9:02 UTC (permalink / raw)
To: Anthony Liguori; +Cc: Ryan Harper, qemu-devel, kvm
Anthony Liguori wrote:
> This patch implements a simple fd pool to allow many AIO requests with
> posix-aio. The result is significantly improved performance (identical to that
> reported for linux-aio) for both cache=on and cache=off.
>
> The fundamental problem with posix-aio is that it limits itself to one thread
> per-file descriptor. I don't know why this is, but this patch provides a simple
> mechanism to work around this (duplicating the file descriptor).
>
> This isn't a great solution, but it seems like a reasonable intermediate step
> between posix-aio and a custom thread-pool to replace it.
>
>
> +static int raw_fd_pool_get(BDRVRawState *s)
> +{
> + int i;
> +
> + for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
> + /* already in use */
> + if (s->fd_pool[i] != -1)
> + continue;
> +
> + /* try to dup file descriptor */
> + s->fd_pool[i] = dup(s->fd);
> + if (s->fd_pool[i] != -1)
> + return s->fd_pool[i];
> + }
> +
> + /* we couldn't dup the file descriptor so just use the main one */
> + return s->fd;
> +}
> +
>
dup()ing the fd on each request is unnecessary work; would be better to
cache the duped fd.
Of course, if this is just a stepping stone, it doesn't matter very much.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 3+ messages in thread
* [Qemu-devel] Re: [PATCH] Implement an fd pool to get real AIO with posix-aio
2008-09-24 9:02 ` [Qemu-devel] " Avi Kivity
@ 2008-09-24 14:17 ` Anthony Liguori
0 siblings, 0 replies; 3+ messages in thread
From: Anthony Liguori @ 2008-09-24 14:17 UTC (permalink / raw)
To: Avi Kivity; +Cc: Ryan Harper, qemu-devel, kvm
Avi Kivity wrote:
> Anthony Liguori wrote:
>
> dup()ing the fd on each request is unnecessary work; would be better
> to cache the duped fd.
Yeah, I was concerned about this too. Ryan reran the fio benchmark and
the submission latency and completion latency were identical to the
linux-aio patches. That suggests that the overhead of dup() is lost in
the noise.
Since this is simpler and keeps the number of open file descriptors as
low as possible, I was happy about that.
Regards,
Anthony Liguori
> Of course, if this is just a stepping stone, it doesn't matter very much.
>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2008-09-24 14:18 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-09-23 20:34 [Qemu-devel] [PATCH] Implement an fd pool to get real AIO with posix-aio Anthony Liguori
2008-09-24 9:02 ` [Qemu-devel] " Avi Kivity
2008-09-24 14:17 ` Anthony Liguori
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).