All of lore.kernel.org
 help / color / mirror / Atom feed
From: Marcelo Tosatti <mtosatti@redhat.com>
To: Liran Schour <LIRANS@il.ibm.com>, Kevin Wolf <kwolf@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>,
	Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>,
	qemu-devel@nongnu.org
Subject: [Qemu-devel] [patch 3/3] block migration: do not submit multiple AIOs for same sector
Date: Mon, 08 Nov 2010 17:02:56 -0200	[thread overview]
Message-ID: <20101108190918.201486474@redhat.com> (raw)
In-Reply-To: 20101108190253.560821111@redhat.com

[-- Attachment #1: 03-block-migration-inflight-read --]
[-- Type: text/plain, Size: 5041 bytes --]

Block migration can submit multiple AIO reads for the same sector/chunk, but
completion of such reads can happen out of order:

migration               guest
- get_dirty(N)
- aio_read(N) 
- clear_dirty(N)
                        write(N)
                        set_dirty(N)
- get_dirty(N)
- aio_read(N)

If the first aio_read completes after the second, stale data will be 
migrated to the destination.

Fix by not allowing multiple AIOs inflight for the same sector.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>


Index: qemu-kvm/block-migration.c
===================================================================
--- qemu-kvm.orig/block-migration.c
+++ qemu-kvm/block-migration.c
@@ -49,12 +49,14 @@ typedef struct BlkMigDevState {
     int64_t total_sectors;
     int64_t dirty;
     QSIMPLEQ_ENTRY(BlkMigDevState) entry;
+    unsigned long *aio_bitmap;
 } BlkMigDevState;
 
 typedef struct BlkMigBlock {
     uint8_t *buf;
     BlkMigDevState *bmds;
     int64_t sector;
+    int nr_sectors;
     struct iovec iov;
     QEMUIOVector qiov;
     BlockDriverAIOCB *aiocb;
@@ -140,6 +142,57 @@ static inline long double compute_read_b
     return  (block_mig_state.reads * BLOCK_SIZE)/ block_mig_state.total_time;
 }
 
+static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
+{
+    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
+
+    if (bmds->aio_bitmap &&
+        (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bmds->bs)) {
+        return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
+            (1UL << (chunk % (sizeof(unsigned long) * 8))));
+    } else {
+        return 0;
+    }
+}
+
+static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
+                             int nb_sectors, int set)
+{
+    int64_t start, end;
+    unsigned long val, idx, bit;
+
+    start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
+    end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
+
+    for (; start <= end; start++) {
+        idx = start / (sizeof(unsigned long) * 8);
+        bit = start % (sizeof(unsigned long) * 8);
+        val = bmds->aio_bitmap[idx];
+        if (set) {
+            if (!(val & (1UL << bit))) {
+                val |= 1UL << bit;
+            }
+        } else {
+            if (val & (1UL << bit)) {
+                val &= ~(1UL << bit);
+            }
+        }
+        bmds->aio_bitmap[idx] = val;
+    }
+}
+
+static void alloc_aio_bitmap(BlkMigDevState *bmds)
+{
+    BlockDriverState *bs = bmds->bs;
+    int64_t bitmap_size;
+
+    bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
+            BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
+    bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
+
+    bmds->aio_bitmap = qemu_mallocz(bitmap_size);
+}
+
 static void blk_mig_read_cb(void *opaque, int ret)
 {
     BlkMigBlock *blk = opaque;
@@ -151,6 +204,7 @@ static void blk_mig_read_cb(void *opaque
     add_avg_read_time(blk->time);
 
     QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
+    bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);
 
     block_mig_state.submitted--;
     block_mig_state.read_done++;
@@ -194,6 +248,7 @@ static int mig_save_device_bulk(Monitor 
     blk->buf = qemu_malloc(BLOCK_SIZE);
     blk->bmds = bmds;
     blk->sector = cur_sector;
+    blk->nr_sectors = nr_sectors;
 
     blk->iov.iov_base = blk->buf;
     blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
@@ -248,6 +303,7 @@ static void init_blk_migration_it(void *
         bmds->total_sectors = sectors;
         bmds->completed_sectors = 0;
         bmds->shared_base = block_mig_state.shared_base;
+        alloc_aio_bitmap(bmds);
 
         block_mig_state.total_sector_sum += sectors;
 
@@ -329,6 +385,8 @@ static int mig_save_device_dirty(Monitor
     int nr_sectors;
 
     for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
+        if (bmds_aio_inflight(bmds, sector))
+            qemu_aio_flush();
         if (bdrv_get_dirty(bmds->bs, sector)) {
 
             if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
@@ -340,6 +398,7 @@ static int mig_save_device_dirty(Monitor
             blk->buf = qemu_malloc(BLOCK_SIZE);
             blk->bmds = bmds;
             blk->sector = sector;
+            blk->nr_sectors = nr_sectors;
 
             if (is_async) {
                 blk->iov.iov_base = blk->buf;
@@ -354,6 +413,7 @@ static int mig_save_device_dirty(Monitor
                     goto error;
                 }
                 block_mig_state.submitted++;
+                bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
             } else {
                 if (bdrv_read(bmds->bs, sector, blk->buf,
                               nr_sectors) < 0) {
@@ -474,6 +534,7 @@ static void blk_mig_cleanup(Monitor *mon
 
     while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
+        qemu_free(bmds->aio_bitmap);
         qemu_free(bmds);
     }
 

  parent reply	other threads:[~2010-11-08 19:14 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-11-08 19:02 [Qemu-devel] [patch 0/3] block migration fixes Marcelo Tosatti
2010-11-08 19:02 ` [Qemu-devel] [patch 1/3] block: fix shift in dirty bitmap calculation Marcelo Tosatti
2010-11-09 12:02   ` [Qemu-devel] " Kevin Wolf
2010-11-08 19:02 ` [Qemu-devel] [patch 2/3] block: set sector dirty on AIO write completion Marcelo Tosatti
2010-11-09 12:02   ` [Qemu-devel] " Kevin Wolf
2010-11-08 19:02 ` Marcelo Tosatti [this message]
2010-11-09 12:42   ` [Qemu-devel] Re: [patch 3/3] block migration: do not submit multiple AIOs for same sector Kevin Wolf
2010-11-09 13:49     ` Marcelo Tosatti
2010-11-09  6:02 ` [Qemu-devel] Re: [patch 0/3] block migration fixes Yoshiaki Tamura
2010-11-09 13:08   ` Marcelo Tosatti
2010-11-11  9:06     ` Yoshiaki Tamura
2010-11-12 15:56       ` Marcelo Tosatti
2010-11-21 15:22 ` [Qemu-devel] " Anthony Liguori
2010-11-22  9:43   ` Kevin Wolf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20101108190918.201486474@redhat.com \
    --to=mtosatti@redhat.com \
    --cc=LIRANS@il.ibm.com \
    --cc=kwolf@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=tamura.yoshiaki@lab.ntt.co.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.