All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
To: qemu-devel@nongnu.org
Cc: Fam Zheng <fam@euphon.net>,
	vsementsov@virtuozzo.com, qemu-block@nongnu.org,
	quintela@redhat.com, dgilbert@redhat.com,
	Stefan Hajnoczi <stefanha@redhat.com>,
	andrey.shinkevich@virtuozzo.com, John Snow <jsnow@redhat.com>
Subject: [PATCH v2 09/22] migration/block-dirty-bitmap: relax error handling in incoming part
Date: Mon, 17 Feb 2020 18:02:33 +0300	[thread overview]
Message-ID: <20200217150246.29180-10-vsementsov@virtuozzo.com> (raw)
In-Reply-To: <20200217150246.29180-1-vsementsov@virtuozzo.com>

Bitmaps data is not critical, and we should not fail the migration (or
use postcopy recovering) because of dirty-bitmaps migration failure.
Instead we should just lose unfinished bitmaps.

Still we have to report io stream violation errors, as they affect the
whole migration stream.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 migration/block-dirty-bitmap.c | 148 +++++++++++++++++++++++++--------
 1 file changed, 113 insertions(+), 35 deletions(-)

diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 1329db8d7d..aea5326804 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -145,6 +145,15 @@ typedef struct DBMLoadState {
 
     bool before_vm_start_handled; /* set in dirty_bitmap_mig_before_vm_start */
 
+    /*
+     * cancelled
+     * Incoming migration is cancelled for some reason. That means that we
+     * still should read our chunks from migration stream, to not affect other
+     * migration objects (like RAM), but just ignore them and do not touch any
+     * bitmaps or nodes.
+     */
+    bool cancelled;
+
     GSList *bitmaps;
     QemuMutex lock; /* protect bitmaps */
 } DBMLoadState;
@@ -545,13 +554,47 @@ void dirty_bitmap_mig_before_vm_start(void)
     qemu_mutex_unlock(&s->lock);
 }
 
+static void cancel_incoming_locked(DBMLoadState *s)
+{
+    GSList *item;
+
+    if (s->cancelled) {
+        return;
+    }
+
+    s->cancelled = true;
+    s->bs = NULL;
+    s->bitmap = NULL;
+
+    /* Drop all unfinished bitmaps */
+    for (item = s->bitmaps; item; item = g_slist_next(item)) {
+        LoadBitmapState *b = item->data;
+
+        /*
+         * Bitmap must be unfinished, as finished bitmaps should already be
+         * removed from the list.
+         */
+        assert(!s->before_vm_start_handled || !b->migrated);
+        if (bdrv_dirty_bitmap_has_successor(b->bitmap)) {
+            bdrv_reclaim_dirty_bitmap(b->bitmap, &error_abort);
+        }
+        bdrv_release_dirty_bitmap(b->bitmap);
+    }
+
+    g_slist_free_full(s->bitmaps, g_free);
+    s->bitmaps = NULL;
+}
+
 static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s)
 {
     GSList *item;
     trace_dirty_bitmap_load_complete();
-    bdrv_dirty_bitmap_deserialize_finish(s->bitmap);
 
-    qemu_mutex_lock(&s->lock);
+    if (s->cancelled) {
+        return;
+    }
+
+    bdrv_dirty_bitmap_deserialize_finish(s->bitmap);
 
     if (bdrv_dirty_bitmap_has_successor(s->bitmap)) {
         bdrv_reclaim_dirty_bitmap(s->bitmap, &error_abort);
@@ -569,8 +612,6 @@ static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s)
             break;
         }
     }
-
-    qemu_mutex_unlock(&s->lock);
 }
 
 static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s)
@@ -582,15 +623,32 @@ static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s)
 
     if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
         trace_dirty_bitmap_load_bits_zeroes();
-        bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_byte, nr_bytes,
-                                             false);
+        if (!s->cancelled) {
+            bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_byte,
+                                                 nr_bytes, false);
+        }
     } else {
         size_t ret;
         uint8_t *buf;
         uint64_t buf_size = qemu_get_be64(f);
-        uint64_t needed_size =
-            bdrv_dirty_bitmap_serialization_size(s->bitmap,
-                                                 first_byte, nr_bytes);
+        uint64_t needed_size;
+
+        buf = g_malloc(buf_size);
+        ret = qemu_get_buffer(f, buf, buf_size);
+        if (ret != buf_size) {
+            error_report("Failed to read bitmap bits");
+            g_free(buf);
+            return -EIO;
+        }
+
+        if (s->cancelled) {
+            g_free(buf);
+            return 0;
+        }
+
+        needed_size = bdrv_dirty_bitmap_serialization_size(s->bitmap,
+                                                           first_byte,
+                                                           nr_bytes);
 
         if (needed_size > buf_size ||
             buf_size > QEMU_ALIGN_UP(needed_size, 4 * sizeof(long))
@@ -599,15 +657,8 @@ static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s)
             error_report("Migrated bitmap granularity doesn't "
                          "match the destination bitmap '%s' granularity",
                          bdrv_dirty_bitmap_name(s->bitmap));
-            return -EINVAL;
-        }
-
-        buf = g_malloc(buf_size);
-        ret = qemu_get_buffer(f, buf, buf_size);
-        if (ret != buf_size) {
-            error_report("Failed to read bitmap bits");
-            g_free(buf);
-            return -EIO;
+            cancel_incoming_locked(s);
+            return 0;
         }
 
         bdrv_dirty_bitmap_deserialize_part(s->bitmap, buf, first_byte, nr_bytes,
@@ -632,14 +683,16 @@ static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s)
             error_report("Unable to read node name string");
             return -EINVAL;
         }
-        s->bs = bdrv_lookup_bs(s->node_name, s->node_name, &local_err);
-        if (!s->bs) {
-            error_report_err(local_err);
-            return -EINVAL;
+        if (!s->cancelled) {
+            s->bs = bdrv_lookup_bs(s->node_name, s->node_name, &local_err);
+            if (!s->bs) {
+                error_report_err(local_err);
+                cancel_incoming_locked(s);
+            }
         }
-    } else if (!s->bs && !nothing) {
+    } else if (!s->bs && !nothing && !s->cancelled) {
         error_report("Error: block device name is not set");
-        return -EINVAL;
+        cancel_incoming_locked(s);
     }
 
     if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
@@ -647,24 +700,38 @@ static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s)
             error_report("Unable to read bitmap name string");
             return -EINVAL;
         }
-        s->bitmap = bdrv_find_dirty_bitmap(s->bs, s->bitmap_name);
-
-        /* bitmap may be NULL here, it wouldn't be an error if it is the
-         * first occurrence of the bitmap */
-        if (!s->bitmap && !(s->flags & DIRTY_BITMAP_MIG_FLAG_START)) {
-            error_report("Error: unknown dirty bitmap "
-                         "'%s' for block device '%s'",
-                         s->bitmap_name, s->node_name);
-            return -EINVAL;
+        if (!s->cancelled) {
+            s->bitmap = bdrv_find_dirty_bitmap(s->bs, s->bitmap_name);
+
+            /*
+             * bitmap may be NULL here, it wouldn't be an error if it is the
+             * first occurrence of the bitmap
+             */
+            if (!s->bitmap && !(s->flags & DIRTY_BITMAP_MIG_FLAG_START)) {
+                error_report("Error: unknown dirty bitmap "
+                             "'%s' for block device '%s'",
+                             s->bitmap_name, s->node_name);
+                cancel_incoming_locked(s);
+            }
         }
-    } else if (!s->bitmap && !nothing) {
+    } else if (!s->bitmap && !nothing && !s->cancelled) {
         error_report("Error: block device name is not set");
-        return -EINVAL;
+        cancel_incoming_locked(s);
     }
 
     return 0;
 }
 
+/*
+ * dirty_bitmap_load
+ *
+ * Load sequence of dirty bitmap chunks. Return error only on fatal io stream
+ * violations. On other errors just cancel bitmaps incoming migration and return
+ * 0.
+ *
+ * Note, than when incoming bitmap migration is canceled, we still must read all
+ * our chunks (and just ignore them), to not affect other migration objects.
+ */
 static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id)
 {
     DBMLoadState *s = &((DBMState *)opaque)->load;
@@ -673,12 +740,19 @@ static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id)
     trace_dirty_bitmap_load_enter();
 
     if (version_id != 1) {
+        qemu_mutex_lock(&s->lock);
+        cancel_incoming_locked(s);
+        qemu_mutex_unlock(&s->lock);
         return -EINVAL;
     }
 
     do {
+        qemu_mutex_lock(&s->lock);
+
         ret = dirty_bitmap_load_header(f, s);
         if (ret < 0) {
+            cancel_incoming_locked(s);
+            qemu_mutex_unlock(&s->lock);
             return ret;
         }
 
@@ -695,8 +769,12 @@ static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id)
         }
 
         if (ret) {
+            cancel_incoming_locked(s);
+            qemu_mutex_unlock(&s->lock);
             return ret;
         }
+
+        qemu_mutex_unlock(&s->lock);
     } while (!(s->flags & DIRTY_BITMAP_MIG_FLAG_EOS));
 
     trace_dirty_bitmap_load_success();
-- 
2.21.0



  parent reply	other threads:[~2020-02-17 15:10 UTC|newest]

Thread overview: 80+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-17 15:02 [PATCH v2 00/22] Fix error handling during bitmap postcopy Vladimir Sementsov-Ogievskiy
2020-02-17 15:02 ` [PATCH v2 01/22] migration/block-dirty-bitmap: fix dirty_bitmap_mig_before_vm_start Vladimir Sementsov-Ogievskiy
2020-02-18  9:44   ` Andrey Shinkevich
2020-02-17 15:02 ` [PATCH v2 02/22] migration/block-dirty-bitmap: rename state structure types Vladimir Sementsov-Ogievskiy
2020-07-23 20:50   ` Eric Blake
2020-02-17 15:02 ` [PATCH v2 03/22] migration/block-dirty-bitmap: rename dirty_bitmap_mig_cleanup Vladimir Sementsov-Ogievskiy
2020-02-18 11:00   ` Andrey Shinkevich
2020-02-19 14:20     ` Vladimir Sementsov-Ogievskiy
2020-07-23 20:54       ` Eric Blake
2020-02-17 15:02 ` [PATCH v2 04/22] migration/block-dirty-bitmap: move mutex init to dirty_bitmap_mig_init Vladimir Sementsov-Ogievskiy
2020-02-18 11:28   ` Andrey Shinkevich
2020-02-17 15:02 ` [PATCH v2 05/22] migration/block-dirty-bitmap: refactor state global variables Vladimir Sementsov-Ogievskiy
2020-02-18 13:05   ` Andrey Shinkevich
2020-02-19 15:29     ` Vladimir Sementsov-Ogievskiy
2020-02-17 15:02 ` [PATCH v2 06/22] migration/block-dirty-bitmap: rename finish_lock to just lock Vladimir Sementsov-Ogievskiy
2020-02-18 13:20   ` Andrey Shinkevich
2020-02-17 15:02 ` [PATCH v2 07/22] migration/block-dirty-bitmap: simplify dirty_bitmap_load_complete Vladimir Sementsov-Ogievskiy
2020-02-18 14:26   ` Andrey Shinkevich
2020-02-19 15:30     ` Vladimir Sementsov-Ogievskiy
2020-02-19 16:14       ` Vladimir Sementsov-Ogievskiy
2020-02-17 15:02 ` [PATCH v2 08/22] migration/block-dirty-bitmap: keep bitmap state for all bitmaps Vladimir Sementsov-Ogievskiy
2020-02-18 17:07   ` Andrey Shinkevich
2020-07-23 21:30   ` Eric Blake
2020-07-24  5:18     ` Vladimir Sementsov-Ogievskiy
2020-02-17 15:02 ` Vladimir Sementsov-Ogievskiy [this message]
2020-02-18 18:54   ` [PATCH v2 09/22] migration/block-dirty-bitmap: relax error handling in incoming part Andrey Shinkevich
2020-02-19 15:34     ` Vladimir Sementsov-Ogievskiy
2020-07-24  7:23       ` Vladimir Sementsov-Ogievskiy
2020-02-17 15:02 ` [PATCH v2 10/22] migration/block-dirty-bitmap: cancel migration on shutdown Vladimir Sementsov-Ogievskiy
2020-02-18 19:11   ` Andrey Shinkevich
2020-07-23 21:04   ` Eric Blake
2020-02-17 15:02 ` [PATCH v2 11/22] migration/savevm: don't worry if bitmap migration postcopy failed Vladimir Sementsov-Ogievskiy
2020-02-17 16:57   ` Dr. David Alan Gilbert
2020-02-18 19:44   ` Andrey Shinkevich
2020-02-17 15:02 ` [PATCH v2 12/22] qemu-iotests/199: fix style Vladimir Sementsov-Ogievskiy
2020-02-19  7:04   ` Andrey Shinkevich
2020-07-23 22:03   ` Eric Blake
2020-07-24  6:32     ` Vladimir Sementsov-Ogievskiy
2020-02-17 15:02 ` [PATCH v2 13/22] qemu-iotests/199: drop extra constraints Vladimir Sementsov-Ogievskiy
2020-02-19  8:02   ` Andrey Shinkevich
2020-02-17 15:02 ` [PATCH v2 14/22] qemu-iotests/199: better catch postcopy time Vladimir Sementsov-Ogievskiy
2020-02-19 13:16   ` Andrey Shinkevich
2020-02-19 15:44     ` Vladimir Sementsov-Ogievskiy
2020-07-24  6:50     ` Vladimir Sementsov-Ogievskiy
2020-02-17 15:02 ` [PATCH v2 15/22] qemu-iotests/199: improve performance: set bitmap by discard Vladimir Sementsov-Ogievskiy
2020-02-19 14:17   ` Andrey Shinkevich
2020-02-17 15:02 ` [PATCH v2 16/22] qemu-iotests/199: change discard patterns Vladimir Sementsov-Ogievskiy
2020-02-19 14:33   ` Andrey Shinkevich
2020-02-19 14:44     ` Andrey Shinkevich
2020-02-19 15:46     ` Vladimir Sementsov-Ogievskiy
2020-07-24  0:23   ` Eric Blake
2020-02-17 15:02 ` [PATCH v2 17/22] qemu-iotests/199: increase postcopy period Vladimir Sementsov-Ogievskiy
2020-02-19 14:56   ` Andrey Shinkevich
2020-07-24  0:14   ` Eric Blake
2020-02-17 15:02 ` [PATCH v2 18/22] python/qemu/machine: add kill() method Vladimir Sementsov-Ogievskiy
2020-02-19 17:00   ` Andrey Shinkevich
2020-05-29 10:09   ` Philippe Mathieu-Daudé
2020-02-17 15:02 ` [PATCH v2 19/22] qemu-iotests/199: prepare for new test-cases addition Vladimir Sementsov-Ogievskiy
2020-02-19 16:10   ` Andrey Shinkevich
2020-02-17 15:02 ` [PATCH v2 20/22] qemu-iotests/199: check persistent bitmaps Vladimir Sementsov-Ogievskiy
2020-02-19 16:28   ` Andrey Shinkevich
2020-02-17 15:02 ` [PATCH v2 21/22] qemu-iotests/199: add early shutdown case to bitmaps postcopy Vladimir Sementsov-Ogievskiy
2020-02-19 16:48   ` Andrey Shinkevich
2020-02-19 16:50   ` Andrey Shinkevich
2020-02-17 15:02 ` [PATCH v2 22/22] qemu-iotests/199: add source-killed " Vladimir Sementsov-Ogievskiy
2020-02-19 17:15   ` Andrey Shinkevich
2020-07-24  7:50     ` Vladimir Sementsov-Ogievskiy
2020-02-17 19:31 ` [PATCH v2 00/22] Fix error handling during bitmap postcopy no-reply
2020-02-18 20:02 ` Andrey Shinkevich
2020-02-18 20:57   ` Eric Blake
2020-02-19 13:25     ` Andrey Shinkevich
2020-02-19 13:36       ` Eric Blake
2020-02-19 13:52         ` Andrey Shinkevich
2020-02-19 14:58           ` Eric Blake
2020-02-19 17:22             ` Andrey Shinkevich
2020-02-19 14:00         ` Eric Blake
2020-04-02  7:42 ` Vladimir Sementsov-Ogievskiy
2020-05-29 11:58   ` Eric Blake
2020-05-29 12:16     ` Vladimir Sementsov-Ogievskiy
2020-07-23 20:39       ` Eric Blake

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200217150246.29180-10-vsementsov@virtuozzo.com \
    --to=vsementsov@virtuozzo.com \
    --cc=andrey.shinkevich@virtuozzo.com \
    --cc=dgilbert@redhat.com \
    --cc=fam@euphon.net \
    --cc=jsnow@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.