qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Marcelo Tosatti <mtosatti@redhat.com>
To: Liran Schour <LIRANS@il.ibm.com>, Kevin Wolf <kwolf@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>,
	Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>,
	qemu-devel@nongnu.org
Subject: [Qemu-devel] [patch 3/3] block migration: do not submit multiple AIOs for same sector
Date: Mon, 08 Nov 2010 17:02:56 -0200	[thread overview]
Message-ID: <20101108190918.201486474@redhat.com> (raw)
In-Reply-To: 20101108190253.560821111@redhat.com

[-- Attachment #1: 03-block-migration-inflight-read --]
[-- Type: text/plain, Size: 5041 bytes --]

Block migration can submit multiple AIO reads for the same sector/chunk, but
completion of such reads can happen out of order:

migration               guest
- get_dirty(N)
- aio_read(N) 
- clear_dirty(N)
                        write(N)
                        set_dirty(N)
- get_dirty(N)
- aio_read(N)

If the first aio_read completes after the second, stale data will be 
migrated to the destination.

Fix by not allowing multiple AIOs inflight for the same sector.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>


Index: qemu-kvm/block-migration.c
===================================================================
--- qemu-kvm.orig/block-migration.c
+++ qemu-kvm/block-migration.c
@@ -49,12 +49,14 @@ typedef struct BlkMigDevState {
     int64_t total_sectors;
     int64_t dirty;
     QSIMPLEQ_ENTRY(BlkMigDevState) entry;
+    unsigned long *aio_bitmap;
 } BlkMigDevState;
 
 typedef struct BlkMigBlock {
     uint8_t *buf;
     BlkMigDevState *bmds;
     int64_t sector;
+    int nr_sectors;
     struct iovec iov;
     QEMUIOVector qiov;
     BlockDriverAIOCB *aiocb;
@@ -140,6 +142,57 @@ static inline long double compute_read_b
     return  (block_mig_state.reads * BLOCK_SIZE)/ block_mig_state.total_time;
 }
 
+static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
+{
+    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
+
+    if (bmds->aio_bitmap &&
+        (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bmds->bs)) {
+        return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
+            (1UL << (chunk % (sizeof(unsigned long) * 8))));
+    } else {
+        return 0;
+    }
+}
+
+static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
+                             int nb_sectors, int set)
+{
+    int64_t start, end;
+    unsigned long val, idx, bit;
+
+    start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
+    end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
+
+    for (; start <= end; start++) {
+        idx = start / (sizeof(unsigned long) * 8);
+        bit = start % (sizeof(unsigned long) * 8);
+        val = bmds->aio_bitmap[idx];
+        if (set) {
+            if (!(val & (1UL << bit))) {
+                val |= 1UL << bit;
+            }
+        } else {
+            if (val & (1UL << bit)) {
+                val &= ~(1UL << bit);
+            }
+        }
+        bmds->aio_bitmap[idx] = val;
+    }
+}
+
+static void alloc_aio_bitmap(BlkMigDevState *bmds)
+{
+    BlockDriverState *bs = bmds->bs;
+    int64_t bitmap_size;
+
+    bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
+            BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
+    bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
+
+    bmds->aio_bitmap = qemu_mallocz(bitmap_size);
+}
+
 static void blk_mig_read_cb(void *opaque, int ret)
 {
     BlkMigBlock *blk = opaque;
@@ -151,6 +204,7 @@ static void blk_mig_read_cb(void *opaque
     add_avg_read_time(blk->time);
 
     QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
+    bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);
 
     block_mig_state.submitted--;
     block_mig_state.read_done++;
@@ -194,6 +248,7 @@ static int mig_save_device_bulk(Monitor 
     blk->buf = qemu_malloc(BLOCK_SIZE);
     blk->bmds = bmds;
     blk->sector = cur_sector;
+    blk->nr_sectors = nr_sectors;
 
     blk->iov.iov_base = blk->buf;
     blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
@@ -248,6 +303,7 @@ static void init_blk_migration_it(void *
         bmds->total_sectors = sectors;
         bmds->completed_sectors = 0;
         bmds->shared_base = block_mig_state.shared_base;
+        alloc_aio_bitmap(bmds);
 
         block_mig_state.total_sector_sum += sectors;
 
@@ -329,6 +385,8 @@ static int mig_save_device_dirty(Monitor
     int nr_sectors;
 
     for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
+        if (bmds_aio_inflight(bmds, sector))
+            qemu_aio_flush();
         if (bdrv_get_dirty(bmds->bs, sector)) {
 
             if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
@@ -340,6 +398,7 @@ static int mig_save_device_dirty(Monitor
             blk->buf = qemu_malloc(BLOCK_SIZE);
             blk->bmds = bmds;
             blk->sector = sector;
+            blk->nr_sectors = nr_sectors;
 
             if (is_async) {
                 blk->iov.iov_base = blk->buf;
@@ -354,6 +413,7 @@ static int mig_save_device_dirty(Monitor
                     goto error;
                 }
                 block_mig_state.submitted++;
+                bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
             } else {
                 if (bdrv_read(bmds->bs, sector, blk->buf,
                               nr_sectors) < 0) {
@@ -474,6 +534,7 @@ static void blk_mig_cleanup(Monitor *mon
 
     while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
+        qemu_free(bmds->aio_bitmap);
         qemu_free(bmds);
     }
 

  parent reply	other threads:[~2010-11-08 19:14 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-11-08 19:02 [Qemu-devel] [patch 0/3] block migration fixes Marcelo Tosatti
2010-11-08 19:02 ` [Qemu-devel] [patch 1/3] block: fix shift in dirty bitmap calculation Marcelo Tosatti
2010-11-09 12:02   ` [Qemu-devel] " Kevin Wolf
2010-11-08 19:02 ` [Qemu-devel] [patch 2/3] block: set sector dirty on AIO write completion Marcelo Tosatti
2010-11-09 12:02   ` [Qemu-devel] " Kevin Wolf
2010-11-08 19:02 ` Marcelo Tosatti [this message]
2010-11-09 12:42   ` [Qemu-devel] Re: [patch 3/3] block migration: do not submit multiple AIOs for same sector Kevin Wolf
2010-11-09 13:49     ` Marcelo Tosatti
2010-11-09  6:02 ` [Qemu-devel] Re: [patch 0/3] block migration fixes Yoshiaki Tamura
2010-11-09 13:08   ` Marcelo Tosatti
2010-11-11  9:06     ` Yoshiaki Tamura
2010-11-12 15:56       ` Marcelo Tosatti
2010-11-21 15:22 ` [Qemu-devel] " Anthony Liguori
2010-11-22  9:43   ` Kevin Wolf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20101108190918.201486474@redhat.com \
    --to=mtosatti@redhat.com \
    --cc=LIRANS@il.ibm.com \
    --cc=kwolf@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=tamura.yoshiaki@lab.ntt.co.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).