All of lore.kernel.org
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH 3/4] block: add bdrv_aio_flush operation
Date: Mon, 31 Aug 2009 22:17:30 +0200	[thread overview]
Message-ID: <20090831201730.GC4874@lst.de> (raw)
In-Reply-To: <20090831201627.GA4811@lst.de>


Instead stalling the VCPU while serving a cache flush try to do it
asynchronously.  Use our good old helper thread pool to issue an
asynchronous fdatasync for raw-posix.  Note that while Linux AIO
implements a fdatasync operation it is not useful for us because
it isn't actually implement in asynchronous fashion.

For now only use it in IDE because virtio-blk doesn't implement
cache flusing yet (will be fixed in patch 4/4) and the interface between
the HBA emulation and scsi-disk will need some changes to accomodate
it for scsi (will be a separate patch series).

Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: qemu/block.c
===================================================================
--- qemu.orig/block.c	2009-08-31 16:49:54.508542113 -0300
+++ qemu/block.c	2009-08-31 16:49:59.593042021 -0300
@@ -54,6 +54,8 @@ static BlockDriverAIOCB *bdrv_aio_readv_
 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque);
+static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque);
 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
                         uint8_t *buf, int nb_sectors);
 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
@@ -138,6 +140,10 @@ void bdrv_register(BlockDriver *bdrv)
         bdrv->bdrv_read = bdrv_read_em;
         bdrv->bdrv_write = bdrv_write_em;
     }
+
+    if (!bdrv->bdrv_aio_flush)
+        bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
+
     bdrv->next = first_drv;
     first_drv = bdrv;
 }
@@ -1369,6 +1375,21 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockD
     return ret;
 }
 
+BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
+				 BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BlockDriver *drv = bs->drv;
+
+    if (!drv)
+        return NULL;
+
+    /*
+     * Note that unlike bdrv_flush the driver is reponsible for flushing a
+     * backing image if it exists.
+     */
+    return drv->bdrv_aio_flush(bs, cb, opaque);
+}
+
 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
 {
     acb->pool->cancel(acb);
@@ -1459,6 +1480,25 @@ static BlockDriverAIOCB *bdrv_aio_writev
     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
 }
 
+static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BlockDriverAIOCBSync *acb;
+
+    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
+    acb->is_write = 1; /* don't bounce in the completion hadler */
+    acb->qiov = NULL;
+    acb->bounce = NULL;
+    acb->ret = 0;
+
+    if (!acb->bh)
+        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
+
+    bdrv_flush(bs);
+    qemu_bh_schedule(acb->bh);
+    return &acb->common;
+}
+
 /**************************************************************/
 /* sync block device emulation */
 
Index: qemu/block.h
===================================================================
--- qemu.orig/block.h	2009-08-31 16:49:54.516577491 -0300
+++ qemu/block.h	2009-08-31 16:49:59.593042021 -0300
@@ -85,6 +85,8 @@ BlockDriverAIOCB *bdrv_aio_readv(BlockDr
 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
                                   QEMUIOVector *iov, int nb_sectors,
                                   BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
+				 BlockDriverCompletionFunc *cb, void *opaque);
 void bdrv_aio_cancel(BlockDriverAIOCB *acb);
 
 /* sg packet commands */
Index: qemu/block_int.h
===================================================================
--- qemu.orig/block_int.h	2009-08-31 16:49:54.512583129 -0300
+++ qemu/block_int.h	2009-08-31 16:49:59.597095469 -0300
@@ -69,6 +69,8 @@ struct BlockDriver {
     BlockDriverAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque);
+    BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque);
 
     const char *protocol_name;
     int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset);
Index: qemu/hw/ide/core.c
===================================================================
--- qemu.orig/hw/ide/core.c	2009-08-31 16:49:54.516577491 -0300
+++ qemu/hw/ide/core.c	2009-08-31 16:49:59.601041920 -0300
@@ -771,6 +771,16 @@ static void ide_atapi_cmd_check_status(I
     ide_set_irq(s);
 }
 
+static void ide_flush_cb(void *opaque, int ret)
+{
+    IDEState *s = opaque;
+
+    /* XXX: how do we signal I/O errors here? */
+
+    s->status = READY_STAT | SEEK_STAT;
+    ide_set_irq(s);
+}
+
 static inline void cpu_to_ube16(uint8_t *buf, int val)
 {
     buf[0] = val >> 8;
@@ -1969,9 +1979,9 @@ void ide_ioport_write(void *opaque, uint
         case WIN_FLUSH_CACHE:
         case WIN_FLUSH_CACHE_EXT:
             if (s->bs)
-                bdrv_flush(s->bs);
-	    s->status = READY_STAT | SEEK_STAT;
-            ide_set_irq(s);
+                bdrv_aio_flush(s->bs, ide_flush_cb, s);
+            else
+                ide_flush_cb(s, 0);
             break;
         case WIN_STANDBY:
         case WIN_STANDBY2:
Index: qemu/block/raw-posix-aio.h
===================================================================
--- qemu.orig/block/raw-posix-aio.h	2009-08-27 23:50:52.510770924 -0300
+++ qemu/block/raw-posix-aio.h	2009-08-31 16:49:59.605095368 -0300
@@ -17,8 +17,9 @@
 #define QEMU_AIO_READ         0x0001
 #define QEMU_AIO_WRITE        0x0002
 #define QEMU_AIO_IOCTL        0x0004
+#define QEMU_AIO_FLUSH        0x0008
 #define QEMU_AIO_TYPE_MASK \
-	(QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL)
+	(QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH)
 
 /* AIO flags */
 #define QEMU_AIO_MISALIGNED   0x1000
Index: qemu/block/raw-posix.c
===================================================================
--- qemu.orig/block/raw-posix.c	2009-08-31 16:49:55.513071598 -0300
+++ qemu/block/raw-posix.c	2009-08-31 16:49:59.613070264 -0300
@@ -574,6 +574,18 @@ static BlockDriverAIOCB *raw_aio_writev(
                           cb, opaque, QEMU_AIO_WRITE);
 }
 
+static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVRawState *s = bs->opaque;
+
+    if (fd_open(bs) < 0)
+        return NULL;
+
+    return paio_submit(bs, s->aio_ctx, s->fd, 0, NULL, 0,
+    		       cb, opaque, QEMU_AIO_FLUSH);
+}
+
 static void raw_close(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
@@ -749,6 +761,7 @@ static BlockDriver bdrv_raw = {
 
     .bdrv_aio_readv = raw_aio_readv,
     .bdrv_aio_writev = raw_aio_writev,
+    .bdrv_aio_flush = raw_aio_flush,
 
     .bdrv_truncate = raw_truncate,
     .bdrv_getlength = raw_getlength,
@@ -1002,6 +1015,7 @@ static BlockDriver bdrv_host_device = {
 
     .bdrv_aio_readv	= raw_aio_readv,
     .bdrv_aio_writev	= raw_aio_writev,
+    .bdrv_aio_flush	= raw_aio_flush,
 
     .bdrv_read          = raw_read,
     .bdrv_write         = raw_write,
@@ -1096,6 +1110,7 @@ static BlockDriver bdrv_host_floppy = {
 
     .bdrv_aio_readv     = raw_aio_readv,
     .bdrv_aio_writev    = raw_aio_writev,
+    .bdrv_aio_flush	= raw_aio_flush,
 
     .bdrv_read          = raw_read,
     .bdrv_write         = raw_write,
@@ -1176,6 +1191,7 @@ static BlockDriver bdrv_host_cdrom = {
 
     .bdrv_aio_readv     = raw_aio_readv,
     .bdrv_aio_writev    = raw_aio_writev,
+    .bdrv_aio_flush	= raw_aio_flush,
 
     .bdrv_read          = raw_read,
     .bdrv_write         = raw_write,
@@ -1295,6 +1311,7 @@ static BlockDriver bdrv_host_cdrom = {
 
     .bdrv_aio_readv     = raw_aio_readv,
     .bdrv_aio_writev    = raw_aio_writev,
+    .bdrv_aio_flush	= raw_aio_flush,
 
     .bdrv_read          = raw_read,
     .bdrv_write         = raw_write,
Index: qemu/posix-aio-compat.c
===================================================================
--- qemu.orig/posix-aio-compat.c	2009-08-27 23:50:52.654237211 -0300
+++ qemu/posix-aio-compat.c	2009-08-31 16:49:59.621095866 -0300
@@ -134,6 +134,16 @@ static size_t handle_aiocb_ioctl(struct 
 	return aiocb->aio_nbytes;
 }
 
+static size_t handle_aiocb_flush(struct qemu_paiocb *aiocb)
+{
+	int ret;
+
+	ret = fdatasync(aiocb->aio_fildes);
+	if (ret == -1)
+		return -errno;
+	return 0;
+}
+
 #ifdef CONFIG_PREADV
 
 static ssize_t
@@ -330,6 +340,9 @@ static void *aio_thread(void *unused)
         case QEMU_AIO_WRITE:
 		ret = handle_aiocb_rw(aiocb);
 		break;
+	case QEMU_AIO_FLUSH:
+		ret = handle_aiocb_flush(aiocb);
+		break;
         case QEMU_AIO_IOCTL:
 		ret = handle_aiocb_ioctl(aiocb);
 		break;
@@ -530,8 +543,10 @@ BlockDriverAIOCB *paio_submit(BlockDrive
     acb->aio_type = type;
     acb->aio_fildes = fd;
     acb->ev_signo = SIGUSR2;
-    acb->aio_iov = qiov->iov;
-    acb->aio_niov = qiov->niov;
+    if (qiov) {
+        acb->aio_iov = qiov->iov;
+        acb->aio_niov = qiov->niov;
+    }
     acb->aio_nbytes = nb_sectors * 512;
     acb->aio_offset = sector_num * 512;
 

  parent reply	other threads:[~2009-08-31 20:17 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-08-31 20:16 [Qemu-devel] [PATCH 0/4] data integrity fixes Christoph Hellwig
2009-08-31 20:16 ` [Qemu-devel] [PATCH 1/4] block: add enable_write_cache flag Christoph Hellwig
2009-08-31 22:09   ` Jamie Lokier
2009-08-31 22:16     ` Christoph Hellwig
2009-08-31 22:46       ` Jamie Lokier
2009-08-31 23:06         ` Christoph Hellwig
2009-09-01 10:38           ` Jamie Lokier
2009-08-31 22:53       ` Anthony Liguori
2009-08-31 22:55         ` Jamie Lokier
2009-08-31 22:58         ` Christoph Hellwig
2009-08-31 22:59         ` Jamie Lokier
2009-08-31 23:06           ` Christoph Hellwig
2009-08-31 23:09             ` Christoph Hellwig
2009-09-02  3:53         ` Christoph Hellwig
2009-09-02 13:13           ` Anthony Liguori
2009-09-02 14:14             ` Christoph Hellwig
2009-09-02 19:49             ` Christoph Hellwig
2009-08-31 20:17 ` [Qemu-devel] [PATCH 2/4] block: use fdatasync instead of fsync Christoph Hellwig
2009-08-31 21:51   ` Jamie Lokier
2009-08-31 21:55     ` Christoph Hellwig
2009-08-31 22:48       ` Jamie Lokier
2009-08-31 22:57         ` Christoph Hellwig
2009-09-01 15:59   ` Blue Swirl
2009-09-01 16:04     ` Christoph Hellwig
2009-09-02  0:34       ` Jamie Lokier
2009-09-02  0:37         ` Christoph Hellwig
2009-09-02  1:18           ` Jamie Lokier
2009-09-02 14:02           ` Blue Swirl
2009-09-02 14:15             ` Christoph Hellwig
2009-08-31 20:17 ` Christoph Hellwig [this message]
2009-09-01 10:24   ` [Qemu-devel] [PATCH 3/4] block: add bdrv_aio_flush operation Avi Kivity
2009-09-01 14:25     ` Christoph Hellwig
2009-08-31 20:18 ` [Qemu-devel] [PATCH 4/4] virtio-blk: add volatile writecache feature Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090831201730.GC4874@lst.de \
    --to=hch@lst.de \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.