qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Andrea Arcangeli <andrea@qumranet.com>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH 2/2] fix bdrv_aio_read API breakage in qcow2
Date: Mon, 1 Sep 2008 12:53:14 +0200	[thread overview]
Message-ID: <20080901105314.GE25764@duo.random> (raw)
In-Reply-To: <20080901104356.GD25764@duo.random>

While testing the dma cancel patch (1/2) I noticed the qemu_aio_flush
was doing nothing at all. And a flood of cmd_writeb commands leading
to a noop-invocation of qemu_aio_flush were executed.

I tracked it down and the major bug in this area (not sure if it could
be the one responsible of the fs corruption) is that if aio callback
is run before the bdrv_aio_read returns, the bm->aiocb of ide will be
not-null and set to the already completed aiocb, so after that
cmd_writeb will be mistaken for a dma cancellation.

In short all 'memset;goto redo' places must be fixed to use the bh and
not to call the callback in the context of bdrv_aio_read or the
bdrv_aio_read model falls apart. Reading from qcow2 holes is possible
with phyisical readahead (kind of breada in linux buffer cache).

All the implications of this bug aren't clear due the amount of code
affected (qcow2 itself with hd_aiocb in qcow_aio_cancel, scsi
etc..). IDE might have been safe by pure luck because of a DMAING
bitflag check before canceling the I/O, otherwise double free would
happen there too. This makes the 1/2 behave perfectly good (aiocb is
always null after qemu_aio_flush returns).

Same bug exists in qcow of course, can be fixed later as it's less
urgent.

Signed-off-by: Andrea Arcangeli <andrea@qumranet.com>

Index: Makefile.target
===================================================================
--- Makefile.target	(revision 5119)
+++ Makefile.target	(working copy)
@@ -474,9 +474,9 @@
 
 OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o net-checksum.o
 ifdef CONFIG_WIN32
-OBJS+=block-raw-win32.o
+OBJS+=block-raw-win32.o block-qcow2.o
 else
-OBJS+=block-raw-posix.o
+OBJS+=block-raw-posix.o block-qcow2.o
 endif
 
 LIBS+=-lz
Index: Makefile
===================================================================
--- Makefile	(revision 5119)
+++ Makefile	(working copy)
@@ -46,7 +46,7 @@
 BLOCK_OBJS=cutils.o qemu-malloc.o
 BLOCK_OBJS+=block-cow.o block-qcow.o aes.o block-vmdk.o block-cloop.o
 BLOCK_OBJS+=block-dmg.o block-bochs.o block-vpc.o block-vvfat.o
-BLOCK_OBJS+=block-qcow2.o block-parallels.o
+BLOCK_OBJS+=block-parallels.o
 ifndef CONFIG_WIN32
 BLOCK_OBJS+=block-nbd.o
 endif
@@ -175,9 +175,9 @@
 
 QEMU_IMG_BLOCK_OBJS = $(BLOCK_OBJS)
 ifdef CONFIG_WIN32
-QEMU_IMG_BLOCK_OBJS += qemu-img-block-raw-win32.o
+QEMU_IMG_BLOCK_OBJS += qemu-img-block-raw-win32.o qemu-img-block-qcow2.o
 else
-QEMU_IMG_BLOCK_OBJS += nbd.o qemu-img-block-raw-posix.o
+QEMU_IMG_BLOCK_OBJS += nbd.o qemu-img-block-raw-posix.o qemu-img-block-qcow2.o
 endif
 
 ######################################################################
@@ -195,7 +195,8 @@
 	$(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_NBD -c -o $@ $<
 
 qemu-nbd$(EXESUF):  qemu-nbd.o qemu-nbd-nbd.o qemu-img-block.o \
-		    osdep.o qemu-nbd-block-raw-posix.o $(BLOCK_OBJS)
+		    osdep.o qemu-nbd-block-raw-posix.o \
+		    qemu-nbd-block-qcow2.o $(BLOCK_OBJS)
 	$(CC) $(LDFLAGS) -o $@ $^ -lz $(LIBS)
 
 # dyngen host tool
Index: block-qcow2.c
===================================================================
--- block-qcow2.c	(revision 5119)
+++ block-qcow2.c	(working copy)
@@ -1169,8 +1169,20 @@
     uint64_t cluster_offset;
     uint8_t *cluster_data;
     BlockDriverAIOCB *hd_aiocb;
+    QEMUBH *bh;
 } QCowAIOCB;
 
+#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
+static void qcow_aio_read_cb(void *opaque, int ret);
+static void qcow_aio_read_bh(void *opaque)
+{
+    QCowAIOCB *acb = opaque;
+    qemu_bh_delete(acb->bh);
+    acb->bh = NULL;
+    qcow_aio_read_cb(opaque, 0);
+}
+#endif
+
 static void qcow_aio_read_cb(void *opaque, int ret)
 {
     QCowAIOCB *acb = opaque;
@@ -1186,7 +1198,9 @@
         return;
     }
 
+#if defined(QEMU_IMG) || defined(QEMU_NBD)
  redo:
+#endif
     /* post process the read buffer */
     if (!acb->cluster_offset) {
         /* nothing to do */
@@ -1227,12 +1241,38 @@
                 if (acb->hd_aiocb == NULL)
                     goto fail;
             } else {
+#if defined(QEMU_IMG) || defined(QEMU_NBD)
                 goto redo;
+#else
+		if (acb->bh) {
+		    ret = -EIO;
+		    goto fail;
+		}
+		acb->bh = qemu_bh_new(qcow_aio_read_bh, acb);
+		if (!acb->bh) {
+		    ret = -EIO;
+		    goto fail;
+		}
+		qemu_bh_schedule(acb->bh);
+#endif
             }
         } else {
             /* Note: in this case, no need to wait */
             memset(acb->buf, 0, 512 * acb->n);
+#if defined(QEMU_IMG) || defined(QEMU_NBD)
             goto redo;
+#else
+	    if (acb->bh) {
+		ret = -EIO;
+		goto fail;
+	    }
+	    acb->bh = qemu_bh_new(qcow_aio_read_bh, acb);
+	    if (!acb->bh) {
+		ret = -EIO;
+		goto fail;
+	    }
+	    qemu_bh_schedule(acb->bh);
+#endif
         }
     } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
         /* add AIO support for compressed blocks ? */
@@ -1240,7 +1280,20 @@
             goto fail;
         memcpy(acb->buf,
                s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
+#if defined(QEMU_IMG) || defined(QEMU_NBD)
         goto redo;
+#else
+	if (acb->bh) {
+	    ret = -EIO;
+	    goto fail;
+	}
+	acb->bh = qemu_bh_new(qcow_aio_read_bh, acb);
+	if (!acb->bh) {
+	    ret = -EIO;
+	    goto fail;
+	}
+	qemu_bh_schedule(acb->bh);
+#endif
     } else {
         if ((acb->cluster_offset & 511) != 0) {
             ret = -EIO;

  reply	other threads:[~2008-09-01 10:53 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-08-29 13:52 [Qemu-devel] [PATCH] ide_dma_cancel will result in partial DMA transfer Andrea Arcangeli
2008-09-01 10:43 ` [Qemu-devel] [PATCH 1/2] " Andrea Arcangeli
2008-09-01 10:53   ` Andrea Arcangeli [this message]
2008-10-22 14:14     ` [Qemu-devel] [PATCH] fix bdrv_aio_read API breakage in qcow2 Andrea Arcangeli
2008-10-27 13:49       ` Anthony Liguori
2008-10-31 17:32       ` Anthony Liguori
2009-01-14 18:06   ` [Qemu-devel] [PATCH] ide_dma_cancel will result in partial DMA transfer Andrea Arcangeli
2009-01-16 16:41     ` Ian Jackson
2009-01-22 19:02     ` Anthony Liguori
2009-02-26 16:43       ` Andrea Arcangeli
2008-09-01 11:21 ` Ian Jackson
2008-09-01 12:13   ` Andrea Arcangeli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080901105314.GE25764@duo.random \
    --to=andrea@qumranet.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).