All of lore.kernel.org
 help / color / mirror / Atom feed
From: Li Zhijian <lizhijian@cn.fujitsu.com>
To: Wen Congyang <wency@cn.fujitsu.com>,
	"Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: den@openvz.org, qemu-devel@nongnu.org,
	Juan Quintela <quintela@redhat.com>
Subject: Re: [Qemu-devel] safety of migration_bitmap_extend
Date: Fri, 13 Nov 2015 16:55:31 +0800	[thread overview]
Message-ID: <5645A583.7030701@cn.fujitsu.com> (raw)
In-Reply-To: <56444EED.4060104@cn.fujitsu.com>


[-- Attachment #1.1: Type: text/plain, Size: 930 bytes --]

On 11/12/2015 04:33 PM, Wen Congyang wrote:

>> >Imagine that migration_dirty_pages is slightly too small and we enter ram_save_iterate;
>> >ram_save_iterate now sends*all*  it's pages, it decrements migration_dirty_pages for
>> >every page sent.  At the end of ram_save_iterate, migration_dirty_pages would be negative.
>> >But migration_dirty_pages is *u*int64_t; so we exit ram_save_iterate,
>> >go around the main migration_thread loop again and call qemu_savevm_state_pending, and
>> >it returns a very large number (because it's actually a negative number), so we keep
>> >going around the loop, because it never gets smaller.
> I don't know how to trigger the problem. I think store migration_dirty_pages in BitmapRcu
> can fix this problem.
>
>
hi, David

It seem that it's not easy to reproduce this problem in my environment.
and the following 2 patches are to fix this issue, can you help to review and test.

thx

Li


[-- Attachment #1.2: Type: text/html, Size: 1804 bytes --]

[-- Attachment #2: 0002-migration-use-rcu-to-protect-migration_dirty_pages.patch --]
[-- Type: text/x-patch, Size: 4978 bytes --]

>From 81f27571b3ec2be2afb76576b47f42fb96a06411 Mon Sep 17 00:00:00 2001
From: Wen Congyang <wency@cn.fujitsu.com>
Date: Fri, 13 Nov 2015 13:16:25 +0800
Subject: [PATCH 2/2] migration: use rcu to protect migration_dirty_pages

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 migration/ram.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index 7f32696..ef259f9 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -221,7 +221,6 @@ static RAMBlock *last_seen_block;
 static RAMBlock *last_sent_block;
 static ram_addr_t last_offset;
 static QemuMutex migration_bitmap_mutex;
-static uint64_t migration_dirty_pages;
 static uint32_t last_version;
 static bool ram_bulk_stage;
 
@@ -246,6 +245,7 @@ static struct BitmapRcu {
      * of the postcopy phase
      */
     unsigned long *unsentmap;
+    uint64_t dirty_pages;
 } *migration_bitmap_rcu;
 
 struct CompressParam {
@@ -580,7 +580,7 @@ static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
     ret = test_and_clear_bit(nr, bitmap);
 
     if (ret) {
-        migration_dirty_pages--;
+        atomic_rcu_read(&migration_bitmap_rcu)->dirty_pages--;
     }
     return ret;
 }
@@ -589,7 +589,7 @@ static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
 {
     unsigned long *bitmap;
     bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
-    migration_dirty_pages +=
+    atomic_rcu_read(&migration_bitmap_rcu)->dirty_pages +=
         cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length);
 }
 
@@ -613,7 +613,7 @@ static void migration_bitmap_sync_init(void)
 static void migration_bitmap_sync(void)
 {
     RAMBlock *block;
-    uint64_t num_dirty_pages_init = migration_dirty_pages;
+    uint64_t num_dirty_pages_init, num_dirty_pages_new;
     MigrationState *s = migrate_get_current();
     int64_t end_time;
     int64_t bytes_xfer_now;
@@ -633,15 +633,17 @@ static void migration_bitmap_sync(void)
 
     qemu_mutex_lock(&migration_bitmap_mutex);
     rcu_read_lock();
+    num_dirty_pages_init = atomic_rcu_read(&migration_bitmap_rcu)->dirty_pages;
     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         migration_bitmap_sync_range(block->offset, block->used_length);
     }
+    num_dirty_pages_new = atomic_rcu_read(&migration_bitmap_rcu)->dirty_pages;
     rcu_read_unlock();
     qemu_mutex_unlock(&migration_bitmap_mutex);
 
-    trace_migration_bitmap_sync_end(migration_dirty_pages
+    trace_migration_bitmap_sync_end(num_dirty_pages_new
                                     - num_dirty_pages_init);
-    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
+    num_dirty_pages_period += num_dirty_pages_new - num_dirty_pages_init;
     end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 
     /* more than 1 second = 1000 millisecons */
@@ -1364,7 +1366,13 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero)
 
 static ram_addr_t ram_save_remaining(void)
 {
-    return migration_dirty_pages;
+    ram_addr_t dirty_pages;
+
+    rcu_read_lock();
+    dirty_pages = atomic_rcu_read(&migration_bitmap_rcu)->dirty_pages;
+    rcu_read_unlock();
+
+    return dirty_pages;
 }
 
 uint64_t ram_bytes_remaining(void)
@@ -1454,7 +1462,9 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
          */
         qemu_mutex_lock(&migration_bitmap_mutex);
         bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
+        bitmap->dirty_pages = bitmap_weight(bitmap->bmap, old);
         bitmap_set(bitmap->bmap, old, new - old);
+        bitmap->dirty_pages += new - old;
 
         /* We don't have a way to safely extend the sentmap
          * with RCU; so mark it as missing, entry to postcopy
@@ -1464,7 +1474,6 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
 
         atomic_rcu_set(&migration_bitmap_rcu, bitmap);
         qemu_mutex_unlock(&migration_bitmap_mutex);
-        migration_dirty_pages += new - old;
         call_rcu(old_bitmap, migration_bitmap_free, rcu);
     }
 }
@@ -1692,7 +1701,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
                  * Remark them as dirty, updating the count for any pages
                  * that weren't previously dirty.
                  */
-                migration_dirty_pages += !test_and_set_bit(page, bitmap);
+                atomic_rcu_read(&migration_bitmap_rcu)->dirty_pages +=
+                    !test_and_set_bit(page, bitmap);
             }
         }
 
@@ -1924,7 +1934,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
      * Count the total number of pages used by ram blocks not including any
      * gaps due to alignment or unplugs.
      */
-    migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
+    migration_bitmap_rcu->dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
 
     memory_global_dirty_log_start();
     migration_bitmap_sync();
-- 
2.1.4


[-- Attachment #3: 0001-bitmap-add-bitmap_weight-api.patch --]
[-- Type: text/x-patch, Size: 1590 bytes --]

>From 7a2cffb8802e955ede85fb907e94a7172b4a0fa6 Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@cn.fujitsu.com>
Date: Fri, 13 Nov 2015 15:51:48 +0800
Subject: [PATCH 1/2] bitmap: add bitmap_weight() api

count the number of bit set in bitmap

Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
---
 include/qemu/bitmap.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h
index 86dd9cd..6e48429 100644
--- a/include/qemu/bitmap.h
+++ b/include/qemu/bitmap.h
@@ -43,6 +43,7 @@
  * bitmap_clear(dst, pos, nbits)		Clear specified bit area
  * bitmap_test_and_clear_atomic(dst, pos, nbits)    Test and clear area
  * bitmap_find_next_zero_area(buf, len, pos, n, mask)	Find bit free area
+ * bitmap_weight(src, nbits)                    Hamming Weight: number set bits
  */
 
 /*
@@ -227,6 +228,23 @@ static inline int bitmap_intersects(const unsigned long *src1,
     }
 }
 
+static inline int bitmap_weight(const unsigned long *src, long nbits)
+{
+    int i, count = 0, nlong = nbits / BITS_PER_LONG;
+
+    if (small_nbits(nbits)) {
+        return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
+    }
+    for (i = 0; i < nlong; i++) {
+        count += hweight_long(src[i]);
+    }
+    if (nbits % BITS_PER_LONG) {
+        count += hweight_long(src[i] & BITMAP_LAST_WORD_MASK(nbits));
+    }
+
+    return count;
+}
+
 void bitmap_set(unsigned long *map, long i, long len);
 void bitmap_set_atomic(unsigned long *map, long i, long len);
 void bitmap_clear(unsigned long *map, long start, long nr);
-- 
2.1.4


      reply	other threads:[~2015-11-13  8:55 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-03 12:23 [Qemu-devel] safety of migration_bitmap_extend Dr. David Alan Gilbert
2015-11-03 12:55 ` Juan Quintela
2015-11-03 13:47   ` Dr. David Alan Gilbert
2015-11-04  3:10     ` Wen Congyang
2015-11-04  9:05       ` Dr. David Alan Gilbert
2015-11-04  9:13         ` Wen Congyang
2015-11-04  9:19           ` Dr. David Alan Gilbert
2015-11-12  8:33             ` Wen Congyang
2015-11-13  8:55               ` Li Zhijian [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5645A583.7030701@cn.fujitsu.com \
    --to=lizhijian@cn.fujitsu.com \
    --cc=den@openvz.org \
    --cc=dgilbert@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=wency@cn.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.