All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
To: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Cc: kwolf@redhat.com, fam@euphon.net, qemu-block@nongnu.org,
	quintela@redhat.com, qemu-devel@nongnu.org, mreitz@redhat.com,
	stefanha@redhat.com, andrey.shinkevich@virtuozzo.com,
	den@openvz.org, jsnow@redhat.com
Subject: Re: [PATCH v3 15/21] migration/block-dirty-bitmap: relax error handling in incoming part
Date: Mon, 27 Jul 2020 12:23:37 +0100	[thread overview]
Message-ID: <20200727112337.GK3040@work-vm> (raw)
In-Reply-To: <20200724084327.15665-16-vsementsov@virtuozzo.com>

* Vladimir Sementsov-Ogievskiy (vsementsov@virtuozzo.com) wrote:
> Bitmaps data is not critical, and we should not fail the migration (or
> use postcopy recovering) because of dirty-bitmaps migration failure.
> Instead we should just lose unfinished bitmaps.
> 
> Still we have to report io stream violation errors, as they affect the
> whole migration stream.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
>  migration/block-dirty-bitmap.c | 152 +++++++++++++++++++++++++--------
>  1 file changed, 117 insertions(+), 35 deletions(-)
> 
> diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
> index eb4ffeac4d..c24d4614bf 100644
> --- a/migration/block-dirty-bitmap.c
> +++ b/migration/block-dirty-bitmap.c
> @@ -145,6 +145,15 @@ typedef struct DBMLoadState {
>  
>      bool before_vm_start_handled; /* set in dirty_bitmap_mig_before_vm_start */
>  
> +    /*
> +     * cancelled
> +     * Incoming migration is cancelled for some reason. That means that we
> +     * still should read our chunks from migration stream, to not affect other
> +     * migration objects (like RAM), but just ignore them and do not touch any
> +     * bitmaps or nodes.
> +     */
> +    bool cancelled;
> +
>      GSList *bitmaps;
>      QemuMutex lock; /* protect bitmaps */
>  } DBMLoadState;
> @@ -531,6 +540,10 @@ static int dirty_bitmap_load_start(QEMUFile *f, DBMLoadState *s)
>      uint8_t flags = qemu_get_byte(f);
>      LoadBitmapState *b;
>  
> +    if (s->cancelled) {
> +        return 0;
> +    }
> +
>      if (s->bitmap) {
>          error_report("Bitmap with the same name ('%s') already exists on "
>                       "destination", bdrv_dirty_bitmap_name(s->bitmap));
> @@ -613,13 +626,47 @@ void dirty_bitmap_mig_before_vm_start(void)
>      qemu_mutex_unlock(&s->lock);
>  }
>  
> +static void cancel_incoming_locked(DBMLoadState *s)
> +{
> +    GSList *item;
> +
> +    if (s->cancelled) {
> +        return;
> +    }
> +
> +    s->cancelled = true;
> +    s->bs = NULL;
> +    s->bitmap = NULL;
> +
> +    /* Drop all unfinished bitmaps */
> +    for (item = s->bitmaps; item; item = g_slist_next(item)) {
> +        LoadBitmapState *b = item->data;
> +
> +        /*
> +         * Bitmap must be unfinished, as finished bitmaps should already be
> +         * removed from the list.
> +         */
> +        assert(!s->before_vm_start_handled || !b->migrated);
> +        if (bdrv_dirty_bitmap_has_successor(b->bitmap)) {
> +            bdrv_reclaim_dirty_bitmap(b->bitmap, &error_abort);
> +        }
> +        bdrv_release_dirty_bitmap(b->bitmap);
> +    }
> +
> +    g_slist_free_full(s->bitmaps, g_free);
> +    s->bitmaps = NULL;
> +}
> +
>  static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s)
>  {
>      GSList *item;
>      trace_dirty_bitmap_load_complete();
> -    bdrv_dirty_bitmap_deserialize_finish(s->bitmap);
>  
> -    qemu_mutex_lock(&s->lock);
> +    if (s->cancelled) {
> +        return;
> +    }
> +
> +    bdrv_dirty_bitmap_deserialize_finish(s->bitmap);
>  
>      if (bdrv_dirty_bitmap_has_successor(s->bitmap)) {
>          bdrv_reclaim_dirty_bitmap(s->bitmap, &error_abort);
> @@ -637,8 +684,6 @@ static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s)
>              break;
>          }
>      }
> -
> -    qemu_mutex_unlock(&s->lock);
>  }
>  
>  static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s)
> @@ -650,15 +695,32 @@ static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s)
>  
>      if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
>          trace_dirty_bitmap_load_bits_zeroes();
> -        bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_byte, nr_bytes,
> -                                             false);
> +        if (!s->cancelled) {
> +            bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_byte,
> +                                                 nr_bytes, false);
> +        }
>      } else {
>          size_t ret;
>          uint8_t *buf;
>          uint64_t buf_size = qemu_get_be64(f);
> -        uint64_t needed_size =
> -            bdrv_dirty_bitmap_serialization_size(s->bitmap,
> -                                                 first_byte, nr_bytes);
> +        uint64_t needed_size;
> +
> +        buf = g_malloc(buf_size);
> +        ret = qemu_get_buffer(f, buf, buf_size);
> +        if (ret != buf_size) {
> +            error_report("Failed to read bitmap bits");
> +            g_free(buf);
> +            return -EIO;
> +        }
> +
> +        if (s->cancelled) {
> +            g_free(buf);
> +            return 0;
> +        }
> +
> +        needed_size = bdrv_dirty_bitmap_serialization_size(s->bitmap,
> +                                                           first_byte,
> +                                                           nr_bytes);
>  
>          if (needed_size > buf_size ||
>              buf_size > QEMU_ALIGN_UP(needed_size, 4 * sizeof(long))
> @@ -667,15 +729,8 @@ static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s)
>              error_report("Migrated bitmap granularity doesn't "
>                           "match the destination bitmap '%s' granularity",
>                           bdrv_dirty_bitmap_name(s->bitmap));
> -            return -EINVAL;
> -        }
> -
> -        buf = g_malloc(buf_size);
> -        ret = qemu_get_buffer(f, buf, buf_size);
> -        if (ret != buf_size) {
> -            error_report("Failed to read bitmap bits");
> -            g_free(buf);
> -            return -EIO;
> +            cancel_incoming_locked(s);
> +            return 0;
>          }
>  
>          bdrv_dirty_bitmap_deserialize_part(s->bitmap, buf, first_byte, nr_bytes,
> @@ -700,14 +755,16 @@ static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s)
>              error_report("Unable to read node name string");
>              return -EINVAL;
>          }
> -        s->bs = bdrv_lookup_bs(s->node_name, s->node_name, &local_err);
> -        if (!s->bs) {
> -            error_report_err(local_err);
> -            return -EINVAL;
> +        if (!s->cancelled) {
> +            s->bs = bdrv_lookup_bs(s->node_name, s->node_name, &local_err);
> +            if (!s->bs) {
> +                error_report_err(local_err);
> +                cancel_incoming_locked(s);
> +            }
>          }
> -    } else if (!s->bs && !nothing) {
> +    } else if (!s->bs && !nothing && !s->cancelled) {
>          error_report("Error: block device name is not set");
> -        return -EINVAL;
> +        cancel_incoming_locked(s);
>      }
>  
>      if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
> @@ -715,24 +772,38 @@ static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s)
>              error_report("Unable to read bitmap name string");
>              return -EINVAL;
>          }
> -        s->bitmap = bdrv_find_dirty_bitmap(s->bs, s->bitmap_name);
> -
> -        /* bitmap may be NULL here, it wouldn't be an error if it is the
> -         * first occurrence of the bitmap */
> -        if (!s->bitmap && !(s->flags & DIRTY_BITMAP_MIG_FLAG_START)) {
> -            error_report("Error: unknown dirty bitmap "
> -                         "'%s' for block device '%s'",
> -                         s->bitmap_name, s->node_name);
> -            return -EINVAL;
> +        if (!s->cancelled) {
> +            s->bitmap = bdrv_find_dirty_bitmap(s->bs, s->bitmap_name);
> +
> +            /*
> +             * bitmap may be NULL here, it wouldn't be an error if it is the
> +             * first occurrence of the bitmap
> +             */
> +            if (!s->bitmap && !(s->flags & DIRTY_BITMAP_MIG_FLAG_START)) {
> +                error_report("Error: unknown dirty bitmap "
> +                             "'%s' for block device '%s'",
> +                             s->bitmap_name, s->node_name);
> +                cancel_incoming_locked(s);
> +            }
>          }
> -    } else if (!s->bitmap && !nothing) {
> +    } else if (!s->bitmap && !nothing && !s->cancelled) {
>          error_report("Error: block device name is not set");
> -        return -EINVAL;
> +        cancel_incoming_locked(s);
>      }
>  
>      return 0;
>  }
>  
> +/*
> + * dirty_bitmap_load
> + *
> + * Load sequence of dirty bitmap chunks. Return error only on fatal io stream
> + * violations. On other errors just cancel bitmaps incoming migration and return
> + * 0.
> + *
> + * Note, than when incoming bitmap migration is canceled, we still must read all
> + * our chunks (and just ignore them), to not affect other migration objects.
> + */
>  static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id)
>  {
>      DBMLoadState *s = &((DBMState *)opaque)->load;
> @@ -741,12 +812,19 @@ static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id)
>      trace_dirty_bitmap_load_enter();
>  
>      if (version_id != 1) {
> +        qemu_mutex_lock(&s->lock);
> +        cancel_incoming_locked(s);
> +        qemu_mutex_unlock(&s->lock);
>          return -EINVAL;
>      }
>  
>      do {
> +        qemu_mutex_lock(&s->lock);

Would QEMU_LOCK_GUARD(&s->lock)  work there?
It avoids the need to catch the unlock on each of the failure cases.

Dave

>          ret = dirty_bitmap_load_header(f, s);
>          if (ret < 0) {
> +            cancel_incoming_locked(s);
> +            qemu_mutex_unlock(&s->lock);
>              return ret;
>          }
>  
> @@ -763,8 +841,12 @@ static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id)
>          }
>  
>          if (ret) {
> +            cancel_incoming_locked(s);
> +            qemu_mutex_unlock(&s->lock);
>              return ret;
>          }
> +
> +        qemu_mutex_unlock(&s->lock);
>      } while (!(s->flags & DIRTY_BITMAP_MIG_FLAG_EOS));
>  
>      trace_dirty_bitmap_load_success();
> -- 
> 2.21.0
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



  parent reply	other threads:[~2020-07-27 11:27 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-24  8:43 [PATCH v3 for-5.1?? 00/21] Fix error handling during bitmap postcopy Vladimir Sementsov-Ogievskiy
2020-07-24  8:43 ` [PATCH v3 01/21] qemu-iotests/199: fix style Vladimir Sementsov-Ogievskiy
2020-07-24 15:03   ` Eric Blake
2020-07-24  8:43 ` [PATCH v3 02/21] qemu-iotests/199: drop extra constraints Vladimir Sementsov-Ogievskiy
2020-07-24  8:43 ` [PATCH v3 03/21] qemu-iotests/199: better catch postcopy time Vladimir Sementsov-Ogievskiy
2020-07-24  8:43 ` [PATCH v3 04/21] qemu-iotests/199: improve performance: set bitmap by discard Vladimir Sementsov-Ogievskiy
2020-07-24  8:43 ` [PATCH v3 05/21] qemu-iotests/199: change discard patterns Vladimir Sementsov-Ogievskiy
2020-07-24  8:43 ` [PATCH v3 06/21] qemu-iotests/199: increase postcopy period Vladimir Sementsov-Ogievskiy
2020-07-24  8:43 ` [PATCH v3 07/21] migration/block-dirty-bitmap: fix dirty_bitmap_mig_before_vm_start Vladimir Sementsov-Ogievskiy
2020-07-24 15:49   ` Eric Blake
2020-07-24  8:43 ` [PATCH v3 08/21] migration/block-dirty-bitmap: rename state structure types Vladimir Sementsov-Ogievskiy
2020-07-24  8:43 ` [PATCH v3 09/21] migration/block-dirty-bitmap: rename dirty_bitmap_mig_cleanup Vladimir Sementsov-Ogievskiy
2020-07-24  8:43 ` [PATCH v3 10/21] migration/block-dirty-bitmap: move mutex init to dirty_bitmap_mig_init Vladimir Sementsov-Ogievskiy
2020-07-27  9:51   ` Dr. David Alan Gilbert
2020-07-24  8:43 ` [PATCH v3 11/21] migration/block-dirty-bitmap: refactor state global variables Vladimir Sementsov-Ogievskiy
2020-07-24  8:43 ` [PATCH v3 12/21] migration/block-dirty-bitmap: rename finish_lock to just lock Vladimir Sementsov-Ogievskiy
2020-07-24  8:43 ` [PATCH v3 13/21] migration/block-dirty-bitmap: simplify dirty_bitmap_load_complete Vladimir Sementsov-Ogievskiy
2020-07-24 16:11   ` Eric Blake
2020-07-24  8:43 ` [PATCH v3 14/21] migration/block-dirty-bitmap: keep bitmap state for all bitmaps Vladimir Sementsov-Ogievskiy
2020-07-24  8:43 ` [PATCH v3 15/21] migration/block-dirty-bitmap: relax error handling in incoming part Vladimir Sementsov-Ogievskiy
2020-07-24 17:35   ` Eric Blake
2020-07-24 20:30     ` Vladimir Sementsov-Ogievskiy
2020-07-27 11:16       ` Dr. David Alan Gilbert
2020-07-27 11:26         ` Vladimir Sementsov-Ogievskiy
2020-07-27 11:23   ` Dr. David Alan Gilbert [this message]
2020-07-27 11:28     ` Vladimir Sementsov-Ogievskiy
2020-07-24  8:43 ` [PATCH v3 16/21] migration/block-dirty-bitmap: cancel migration on shutdown Vladimir Sementsov-Ogievskiy
2020-07-27 13:21   ` Dr. David Alan Gilbert
2020-07-27 17:06     ` Vladimir Sementsov-Ogievskiy
2020-07-27 19:27       ` Vladimir Sementsov-Ogievskiy
2020-07-24  8:43 ` [PATCH v3 17/21] migration/savevm: don't worry if bitmap migration postcopy failed Vladimir Sementsov-Ogievskiy
2020-07-24 18:08   ` Eric Blake
2020-07-27 13:29     ` Dr. David Alan Gilbert
2020-07-24  8:43 ` [PATCH v3 18/21] qemu-iotests/199: prepare for new test-cases addition Vladimir Sementsov-Ogievskiy
2020-07-24  8:43 ` [PATCH v3 19/21] qemu-iotests/199: check persistent bitmaps Vladimir Sementsov-Ogievskiy
2020-07-24  8:43 ` [PATCH v3 20/21] qemu-iotests/199: add early shutdown case to bitmaps postcopy Vladimir Sementsov-Ogievskiy
2020-07-24 19:07   ` Eric Blake
2020-07-24  8:43 ` [PATCH v3 21/21] qemu-iotests/199: add source-killed " Vladimir Sementsov-Ogievskiy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200727112337.GK3040@work-vm \
    --to=dgilbert@redhat.com \
    --cc=andrey.shinkevich@virtuozzo.com \
    --cc=den@openvz.org \
    --cc=fam@euphon.net \
    --cc=jsnow@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=stefanha@redhat.com \
    --cc=vsementsov@virtuozzo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.