qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Anthony Liguori <aliguori@us.ibm.com>
To: qemu-devel@nongnu.org
Cc: kvm-devel@lists.sourceforge.net,
	Anthony Liguori <aliguori@us.ibm.com>,
	Marcelo Tosatti <mtosatti@redhat.com>
Subject: [Qemu-devel] [PATCH 3/3] Implement linux-aio backend
Date: Thu, 17 Apr 2008 14:26:52 -0500	[thread overview]
Message-ID: <1208460412-27567-3-git-send-email-aliguori@us.ibm.com> (raw)
In-Reply-To: <1208460412-27567-1-git-send-email-aliguori@us.ibm.com>

This patch introduces a Linux-aio backend that is disabled by default.  To
use this backend effectively, the user should disable caching and select
it with the appropriate -aio option.  For instance:

qemu-system-x86_64 -drive foo.img,cache=off -aio linux

There's no universal way to asynchronous wait with linux-aio.  At some point,
signals were added to signal completion.  More recently, and eventfd interface
was added.  This patch relies on the later.

We try hard to detect whether the right support is available in configure to
avoid compile failures.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>

diff --git a/Makefile.target b/Makefile.target
index f635d68..289887c 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -487,6 +487,9 @@ OBJS+=block-raw-win32.o
 else
 OBJS+=block-raw-posix.o aio-posix.o
 endif
+ifdef CONFIG_LINUX_AIO
+OBJS+=aio-linux.o
+endif
 
 LIBS+=-lz
 ifdef CONFIG_ALSA
diff --git a/aio-linux.c b/aio-linux.c
new file mode 100644
index 0000000..f5c222b
--- /dev/null
+++ b/aio-linux.c
@@ -0,0 +1,210 @@
+/*
+ * QEMU Linux AIO Support
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "qemu-char.h"
+#include "block.h"
+#include "block_int.h"
+#include "block-aio.h"
+#include "sysemu.h"
+
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <linux/aio_abi.h>
+
+int eventfd(unsigned int initval)
+{
+    return syscall(SYS_eventfd, initval);
+}
+
+int io_setup(unsigned nr_reqs, aio_context_t *ctx_id)
+{
+    return syscall(SYS_io_setup, nr_reqs, ctx_id);
+}
+
+int io_destroy(aio_context_t ctx_id)
+{
+    return syscall(SYS_io_destroy, ctx_id);
+}
+
+int io_getevents(aio_context_t ctx_id, long min_nr, long nr,
+		 struct io_event *events, struct timespec *timeout)
+{
+    return syscall(SYS_io_getevents, ctx_id, min_nr, nr, events, timeout);
+}
+
+int io_submit(aio_context_t ctx_id, long nr, struct iocb **iocb)
+{
+    return syscall(SYS_io_submit, ctx_id, nr, iocb);
+}
+
+int io_cancel(aio_context_t ctx_id, struct iocb *iocb, struct io_event *result)
+{
+    return syscall(SYS_io_cancel, ctx_id, iocb, result);
+}
+
+typedef struct LinuxAIOCB {
+    BlockDriverAIOCB common;
+    struct iocb iocb;
+} LinuxAIOCB;
+
+static int aio_efd;
+static aio_context_t aio_ctxt_id;
+static int outstanding_requests;
+
+static BlockDriverAIOCB *la_submit(BlockDriverState *bs,
+				   int fd, int64_t sector_num,
+				   void *buf, int nb_sectors, int write,
+				   BlockDriverCompletionFunc *cb,
+				   void *opaque)
+{
+    LinuxAIOCB *aiocb;
+    struct iocb *iocbs[1];
+    int err;
+
+    aiocb = qemu_aio_get(bs, cb, opaque);
+    if (!aiocb) {
+	printf("returning null??\n");
+	return NULL;
+    }
+
+    if (write)
+	aiocb->iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
+    else
+	aiocb->iocb.aio_lio_opcode = IOCB_CMD_PREAD;
+
+    aiocb->iocb.aio_data = (unsigned long)aiocb;
+    aiocb->iocb.aio_fildes = fd;
+    aiocb->iocb.aio_flags = IOCB_FLAG_RESFD;
+    aiocb->iocb.aio_resfd = aio_efd;
+    aiocb->iocb.aio_buf = (unsigned long)buf;
+    aiocb->iocb.aio_nbytes = nb_sectors * 512;
+    aiocb->iocb.aio_offset = sector_num * 512;
+
+    iocbs[0] = &aiocb->iocb;
+
+    do {
+	err = io_submit(aio_ctxt_id, 1, iocbs);
+    } while (err == -1 && errno == EINTR);
+
+    if (err != 1) {
+	fprintf(stderr, "failed to submit aio request: %m\n");
+	exit(1);
+    }
+
+    outstanding_requests++;
+
+    return &aiocb->common;
+}
+
+static void la_wait(void)
+{
+    main_loop_wait(10);
+}
+
+static void la_flush(void)
+{
+    while (outstanding_requests)
+	la_wait();
+}
+
+static void la_cancel(BlockDriverAIOCB *baiocb)
+{
+    LinuxAIOCB *aiocb = (void *)baiocb;
+    struct io_event result;
+    int err;
+    
+    do {
+	err = io_cancel(aio_ctxt_id, &aiocb->iocb, &result);
+    } while (err == -1 && errno == EINTR);
+
+    /* it may have happened...  we probably should check and complete */
+
+    outstanding_requests--;
+
+    qemu_aio_release(aiocb);
+}
+
+static void la_completion(void *opaque)
+{
+    struct io_event events[256];
+    struct timespec ts = {0, 0};
+    uint64_t count;
+    int i, ret;
+
+    do {
+	ret = read(aio_efd, &count, sizeof(count));
+    } while (ret == -1 && errno == EINTR);
+
+    if (ret != 8) {
+	fprintf(stderr, "bad read from eventfd\n");
+	exit(1);
+    }
+
+    do {
+	ret = io_getevents(aio_ctxt_id, count, ARRAY_SIZE(events),
+			   events, &ts);
+    } while (ret == -1 && errno == EINTR);
+
+    if (ret < count) {
+	fprintf(stderr, "io_getevents failed\n");
+	exit(1);
+    }
+
+    for (i = 0; i < ret; i++) {
+	LinuxAIOCB *aiocb;
+	int res;
+
+	aiocb = (LinuxAIOCB *)(unsigned long)events[i].data;
+	res = events[i].res;
+
+	if (res > 0)
+	    res = 0;
+
+	aiocb->common.cb(aiocb->common.opaque, res);
+	qemu_aio_release(aiocb);
+
+	outstanding_requests--;
+    }
+}
+
+static void la_init(void)
+{
+    aio_efd = eventfd(0);
+    if (aio_efd == -1) {
+	fprintf(stderr, "failed to allocate aio fd\n");
+	exit(1);
+    }
+
+    if (io_setup(256, &aio_ctxt_id) == -1) {
+	fprintf(stderr, "failed to initialize linux aio\n");
+	exit(1);
+    }
+
+    qemu_set_fd_handler2(aio_efd, NULL, la_completion, NULL, NULL);
+}
+
+static AIODriver linux_aio_drv = {
+    .name = "linux",
+    .aiocb_size = sizeof(LinuxAIOCB),
+    .aio_init = la_init,
+    .aio_wait = la_wait,
+    .aio_flush = la_flush,
+    .aio_submit = la_submit,
+    .aio_cancel = la_cancel,
+};
+
+int linux_aio_init(void)
+{
+    return qemu_register_aio(&linux_aio_drv);
+}
diff --git a/block-aio.h b/block-aio.h
index 2fe8c58..6e82cb5 100644
--- a/block-aio.h
+++ b/block-aio.h
@@ -42,5 +42,6 @@ int qemu_set_aio_driver(const char *name);
 extern AIODriver *aio_drv;
 
 int posix_aio_init(void);
+int linux_aio_init(void);
 
 #endif
diff --git a/block.c b/block.c
index 44cb747..259bf3a 100644
--- a/block.c
+++ b/block.c
@@ -1349,6 +1349,11 @@ void bdrv_init(void)
     bdrv_register(&bdrv_qcow2);
     bdrv_register(&bdrv_parallels);
 #ifndef _WIN32
+#ifndef QEMU_IMG
+#ifdef CONFIG_LINUX_AIO
+    linux_aio_init();
+#endif
+#endif
     posix_aio_init();
 #endif
 }
diff --git a/configure b/configure
index 85cb68a..95fb88f 100755
--- a/configure
+++ b/configure
@@ -109,6 +109,7 @@ darwin_user="no"
 build_docs="no"
 uname_release=""
 curses="yes"
+linux_aio="yes"
 
 # OS specific
 targetos=`uname -s`
@@ -326,6 +327,8 @@ for opt do
   ;;
   --disable-curses) curses="no"
   ;;
+  --disable-linux-aio) linux_aio="no"
+  ;;
   *) echo "ERROR: unknown option $opt"; show_help="yes"
   ;;
   esac
@@ -418,6 +421,7 @@ echo "  --enable-fmod            enable FMOD audio driver"
 echo "  --enable-dsound          enable DirectSound audio driver"
 echo "  --disable-vnc-tls        disable TLS encryption for VNC server"
 echo "  --disable-curses         disable curses output"
+echo "  --disable-linux-aio      disable Linux AIO support"
 echo "  --enable-system          enable all system emulation targets"
 echo "  --disable-system         disable all system emulation targets"
 echo "  --enable-linux-user      enable all linux usermode emulation targets"
@@ -687,6 +691,24 @@ EOF
   fi
 fi # test "$curses"
 
+# linux aio probe
+
+if test "$linux_aio" = "yes" ; then
+  linux_aio=no
+  cat > $TMPC <<EOF
+#include <linux/aio_abi.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#ifndef SYS_eventfd
+#error No eventfd support
+#endif
+int main(void) { struct iocb iocb; (void)iocb.aio_resfd; return 0; }
+EOF
+  if $cc $ARCH_CFLAGS -o $TMPE $TMPC 2> /dev/null ; then
+    linux_aio=yes
+  fi
+fi
+
 # Check if tools are available to build documentation.
 if [ -x "`which texi2html 2>/dev/null`" ] && \
    [ -x "`which pod2man 2>/dev/null`" ]; then
@@ -738,6 +760,7 @@ echo "SDL support       $sdl"
 if test "$sdl" != "no" ; then
     echo "SDL static link   $sdl_static"
 fi
+echo "Linux AIO support $linux_aio"
 echo "curses support    $curses"
 echo "mingw32 support   $mingw32"
 echo "Adlib support     $adlib"
@@ -1001,6 +1024,10 @@ if test "$curses" = "yes" ; then
   echo "CONFIG_CURSES=yes" >> $config_mak
   echo "CURSES_LIBS=-lcurses" >> $config_mak
 fi
+if test "$linux_aio" = "yes" ; then
+  echo "#define CONFIG_LINUX_AIO 1" >> $config_h
+  echo "CONFIG_LINUX_AIO=yes" >> $config_mak
+fi
 
 # XXX: suppress that
 if [ "$bsd" = "yes" ] ; then

  parent reply	other threads:[~2008-04-17 19:34 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-17 19:26 [Qemu-devel] [PATCH 1/3] Refactor AIO interface to allow other AIO implementations Anthony Liguori
2008-04-17 19:26 ` [Qemu-devel] [PATCH 2/3] Split out posix-aio code Anthony Liguori
2008-04-17 19:26 ` Anthony Liguori [this message]
2008-04-18 15:09   ` [Qemu-devel] Re: [PATCH 3/3] Implement linux-aio backend Marcelo Tosatti
2008-04-18 15:18     ` Anthony Liguori
2008-04-18 17:46       ` Marcelo Tosatti
2008-04-17 19:38 ` [Qemu-devel] Re: [kvm-devel] [PATCH 1/3] Refactor AIO interface to allow other AIO implementations Daniel P. Berrange
2008-04-17 19:41   ` Anthony Liguori
2008-04-17 20:00     ` Daniel P. Berrange
2008-04-17 20:05       ` Anthony Liguori
2008-04-18 12:43       ` Jamie Lokier
2008-04-18 15:23         ` Anthony Liguori
2008-04-18 16:22           ` Jamie Lokier
2008-04-18 16:32           ` [kvm-devel] [Qemu-devel] " Avi Kivity
2008-04-20 15:49             ` Jamie Lokier
2008-04-20 18:43               ` Avi Kivity
2008-04-20 23:39                 ` Jamie Lokier
2008-04-21  6:39                   ` Avi Kivity
2008-04-21 12:10                     ` Jamie Lokier
2008-04-22  8:10                       ` Avi Kivity
2008-04-22 14:28                         ` Jamie Lokier
2008-04-22 14:53                           ` Anthony Liguori
2008-04-22 15:05                             ` Avi Kivity
2008-04-22 15:23                               ` Jamie Lokier
2008-04-22 15:12                             ` Jamie Lokier
2008-04-22 15:03                           ` Avi Kivity
2008-04-22 15:36                             ` Jamie Lokier
2008-05-02 16:37                               ` Antonio Vargas
2008-05-02 17:18                                 ` Jamie Lokier
2008-05-02 17:52                                   ` Anthony Liguori
2008-05-02 18:24                                     ` Jamie Lokier

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1208460412-27567-3-git-send-email-aliguori@us.ibm.com \
    --to=aliguori@us.ibm.com \
    --cc=kvm-devel@lists.sourceforge.net \
    --cc=mtosatti@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).