qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH] migration: avoid starting a new migration task while the previous one still exist
@ 2013-11-04 11:26 Zhanghaoyu (A)
  2013-11-04 11:30 ` Paolo Bonzini
  2013-11-04 13:59 ` Eric Blake
  0 siblings, 2 replies; 7+ messages in thread
From: Zhanghaoyu (A) @ 2013-11-04 11:26 UTC (permalink / raw)
  To: qemu-devel@nongnu.org, Gleb Natapov, Michael S. Tsirkin,
	paolo.bonzini@gmail.com, Marcelo Tosatti, Eric Blake
  Cc: Huangweidong (C), Luonengjun, Wangrui (K)

Avoid starting a new migration task while the previous one still exist.

Signed-off-by: Zeng Junliang <zengjunliang@huawei.com>
---
 migration.c |   34 ++++++++++++++++++++++------------
 1 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/migration.c b/migration.c
index 2b1ab20..ab4c439 100644
--- a/migration.c
+++ b/migration.c
@@ -40,8 +40,10 @@ enum {
     MIG_STATE_ERROR = -1,
     MIG_STATE_NONE,
     MIG_STATE_SETUP,
+    MIG_STATE_CANCELLING,
     MIG_STATE_CANCELLED,
     MIG_STATE_ACTIVE,
+    MIG_STATE_COMPLETING,
     MIG_STATE_COMPLETED,
 };
 
@@ -196,6 +198,8 @@ MigrationInfo *qmp_query_migrate(Error **errp)
         info->has_total_time = false;
         break;
     case MIG_STATE_ACTIVE:
+    case MIG_STATE_CANCELLING:
+    case MIG_STATE_COMPLETING:
         info->has_status = true;
         info->status = g_strdup("active");
         info->has_total_time = true;
@@ -282,6 +286,13 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 
 /* shared migration helpers */
 
+static void migrate_set_state(MigrationState *s, int old_state, int new_state)
+{
+    if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
+        trace_migrate_set_state(new_state);
+    }
+}
+
 static void migrate_fd_cleanup(void *opaque)
 {
     MigrationState *s = opaque;
@@ -301,20 +312,18 @@ static void migrate_fd_cleanup(void *opaque)
 
     assert(s->state != MIG_STATE_ACTIVE);
 
-    if (s->state != MIG_STATE_COMPLETED) {
+    if (s->state != MIG_STATE_COMPLETING) {
         qemu_savevm_state_cancel();
+        if (s->state == MIG_STATE_CANCELLING) {
+            migrate_set_state(s, MIG_STATE_CANCELLING, MIG_STATE_CANCELLED); 
+        }
+    }else {
+        migrate_set_state(s, MIG_STATE_COMPLETING, MIG_STATE_COMPLETED); 
     }
 
     notifier_list_notify(&migration_state_notifiers, s);
 }
 
-static void migrate_set_state(MigrationState *s, int old_state, int new_state)
-{
-    if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
-        trace_migrate_set_state(new_state);
-    }
-}
-
 void migrate_fd_error(MigrationState *s)
 {
     DPRINTF("setting error state\n");
@@ -328,7 +337,7 @@ static void migrate_fd_cancel(MigrationState *s)
 {
     DPRINTF("cancelling migration\n");
 
-    migrate_set_state(s, s->state, MIG_STATE_CANCELLED);
+    migrate_set_state(s, s->state, MIG_STATE_CANCELLING);
 }
 
 void add_migration_state_change_notifier(Notifier *notify)
@@ -405,7 +414,8 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
     params.blk = has_blk && blk;
     params.shared = has_inc && inc;
 
-    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP) {
+    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP ||
+        s->state == MIG_STATE_COMPLETING || s->state == MIG_STATE_CANCELLING) {
         error_set(errp, QERR_MIGRATION_ACTIVE);
         return;
     }
@@ -594,7 +604,7 @@ static void *migration_thread(void *opaque)
                 }
 
                 if (!qemu_file_get_error(s->file)) {
-                    migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_COMPLETED);
+                    migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_COMPLETING);
                     break;
                 }
             }
@@ -634,7 +644,7 @@ static void *migration_thread(void *opaque)
     }
 
     qemu_mutex_lock_iothread();
-    if (s->state == MIG_STATE_COMPLETED) {
+    if (s->state == MIG_STATE_COMPLETING) {
         int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
         s->total_time = end_time - s->total_time;
         s->downtime = end_time - start_time;
-- 
1.7.3.1.msysgit.0

BTW, while error happened during migration, need the "erroring" state to avoid starting a new migration task while current migration task still exist?
And, do the new added migration states need to be reported to libvirt?

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH] migration: avoid starting a new migration task while the previous one still exist
  2013-11-04 11:26 [Qemu-devel] [PATCH] migration: avoid starting a new migration task while the previous one still exist Zhanghaoyu (A)
@ 2013-11-04 11:30 ` Paolo Bonzini
  2013-11-05  2:23   ` Zhanghaoyu (A)
  2013-11-04 13:59 ` Eric Blake
  1 sibling, 1 reply; 7+ messages in thread
From: Paolo Bonzini @ 2013-11-04 11:30 UTC (permalink / raw)
  To: Zhanghaoyu (A)
  Cc: Huangweidong (C), Gleb Natapov, Michael S. Tsirkin,
	Marcelo Tosatti, Luonengjun, qemu-devel@nongnu.org, Wangrui (K),
	Paolo Bonzini

Il 04/11/2013 12:26, Zhanghaoyu (A) ha scritto:
> Avoid starting a new migration task while the previous one still exist.

Can you explain how to reproduce the problem?

Also please use pbonzini@redhat.com instead.  My Gmail address is an
implementation detail. :)

> Signed-off-by: Zeng Junliang <zengjunliang@huawei.com>

It looks like the author of the patch is not the same as you.  If so,
you need to make Zeng Junliang the author (using --author on the "git
commit" command line) and add your own signoff line.

Paolo

> ---
>  migration.c |   34 ++++++++++++++++++++++------------
>  1 files changed, 22 insertions(+), 12 deletions(-)
> 
> diff --git a/migration.c b/migration.c
> index 2b1ab20..ab4c439 100644
> --- a/migration.c
> +++ b/migration.c
> @@ -40,8 +40,10 @@ enum {
>      MIG_STATE_ERROR = -1,
>      MIG_STATE_NONE,
>      MIG_STATE_SETUP,
> +    MIG_STATE_CANCELLING,
>      MIG_STATE_CANCELLED,
>      MIG_STATE_ACTIVE,
> +    MIG_STATE_COMPLETING,
>      MIG_STATE_COMPLETED,
>  };
>  
> @@ -196,6 +198,8 @@ MigrationInfo *qmp_query_migrate(Error **errp)
>          info->has_total_time = false;
>          break;
>      case MIG_STATE_ACTIVE:
> +    case MIG_STATE_CANCELLING:
> +    case MIG_STATE_COMPLETING:
>          info->has_status = true;
>          info->status = g_strdup("active");
>          info->has_total_time = true;
> @@ -282,6 +286,13 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
>  
>  /* shared migration helpers */
>  
> +static void migrate_set_state(MigrationState *s, int old_state, int new_state)
> +{
> +    if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
> +        trace_migrate_set_state(new_state);
> +    }
> +}
> +
>  static void migrate_fd_cleanup(void *opaque)
>  {
>      MigrationState *s = opaque;
> @@ -301,20 +312,18 @@ static void migrate_fd_cleanup(void *opaque)
>  
>      assert(s->state != MIG_STATE_ACTIVE);
>  
> -    if (s->state != MIG_STATE_COMPLETED) {
> +    if (s->state != MIG_STATE_COMPLETING) {
>          qemu_savevm_state_cancel();
> +        if (s->state == MIG_STATE_CANCELLING) {
> +            migrate_set_state(s, MIG_STATE_CANCELLING, MIG_STATE_CANCELLED); 
> +        }
> +    }else {
> +        migrate_set_state(s, MIG_STATE_COMPLETING, MIG_STATE_COMPLETED); 
>      }
>  
>      notifier_list_notify(&migration_state_notifiers, s);
>  }
>  
> -static void migrate_set_state(MigrationState *s, int old_state, int new_state)
> -{
> -    if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
> -        trace_migrate_set_state(new_state);
> -    }
> -}
> -
>  void migrate_fd_error(MigrationState *s)
>  {
>      DPRINTF("setting error state\n");
> @@ -328,7 +337,7 @@ static void migrate_fd_cancel(MigrationState *s)
>  {
>      DPRINTF("cancelling migration\n");
>  
> -    migrate_set_state(s, s->state, MIG_STATE_CANCELLED);
> +    migrate_set_state(s, s->state, MIG_STATE_CANCELLING);
>  }
>  
>  void add_migration_state_change_notifier(Notifier *notify)
> @@ -405,7 +414,8 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
>      params.blk = has_blk && blk;
>      params.shared = has_inc && inc;
>  
> -    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP) {
> +    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP ||
> +        s->state == MIG_STATE_COMPLETING || s->state == MIG_STATE_CANCELLING) {
>          error_set(errp, QERR_MIGRATION_ACTIVE);
>          return;
>      }
> @@ -594,7 +604,7 @@ static void *migration_thread(void *opaque)
>                  }
>  
>                  if (!qemu_file_get_error(s->file)) {
> -                    migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_COMPLETED);
> +                    migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_COMPLETING);
>                      break;
>                  }
>              }
> @@ -634,7 +644,7 @@ static void *migration_thread(void *opaque)
>      }
>  
>      qemu_mutex_lock_iothread();
> -    if (s->state == MIG_STATE_COMPLETED) {
> +    if (s->state == MIG_STATE_COMPLETING) {
>          int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
>          s->total_time = end_time - s->total_time;
>          s->downtime = end_time - start_time;
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH] migration: avoid starting a new migration task while the previous one still exist
  2013-11-04 11:26 [Qemu-devel] [PATCH] migration: avoid starting a new migration task while the previous one still exist Zhanghaoyu (A)
  2013-11-04 11:30 ` Paolo Bonzini
@ 2013-11-04 13:59 ` Eric Blake
  1 sibling, 0 replies; 7+ messages in thread
From: Eric Blake @ 2013-11-04 13:59 UTC (permalink / raw)
  To: Zhanghaoyu (A), qemu-devel@nongnu.org, Gleb Natapov,
	Michael S. Tsirkin, paolo.bonzini@gmail.com, Marcelo Tosatti
  Cc: Huangweidong (C), Luonengjun, Wangrui (K)

[-- Attachment #1: Type: text/plain, Size: 1440 bytes --]

On 11/04/2013 04:26 AM, Zhanghaoyu (A) wrote:
> Avoid starting a new migration task while the previous one still exist.
> 
> Signed-off-by: Zeng Junliang <zengjunliang@huawei.com>
> ---

It's best to put comments here...

>  migration.c |   34 ++++++++++++++++++++++------------
>  1 files changed, 22 insertions(+), 12 deletions(-)
...

>          s->downtime = end_time - start_time;
> -- 1.7.3.1.msysgit.0 BTW, while error happened during migration, need
> the "erroring" state to avoid starting a new migration task while
> current migration task still exist? And, do the new added migration
> states need to be reported to libvirt?

...rather than here, since most mail clients will strip anything after
the '-- ' separator output by git as if it were the signature, making it
very difficult to reply to (as you can see, my mailer corrupted the
formatting of your question due to how I forced it to reply to your
signature).

As to whether libvirt needs to know about the state, I'm not sure.
Libvirt already has its own code to prevent concurrent attempts at
migration, so theoretically libvirt should never trip the situation that
you appear to be coding for.  Perhaps providing more details in the
commit message about how to actually get into the situation will make it
easier to evaluate.

-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 621 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH] migration: avoid starting a new migration task while the previous one still exist
  2013-11-04 11:30 ` Paolo Bonzini
@ 2013-11-05  2:23   ` Zhanghaoyu (A)
  2013-11-05  9:15     ` Paolo Bonzini
  0 siblings, 1 reply; 7+ messages in thread
From: Zhanghaoyu (A) @ 2013-11-05  2:23 UTC (permalink / raw)
  To: pbonzini@redhat.com, Eric Blake
  Cc: Huangweidong (C), Gleb Natapov, Michael S. Tsirkin,
	Marcelo Tosatti, Luonengjun, qemu-devel@nongnu.org, Wangrui (K)

> > Avoid starting a new migration task while the previous one still
> exist.
> 
> Can you explain how to reproduce the problem?
> 
When network disconnection between source and destination happened, the migration thread stuck at below stack,
#0  0x00007f07e96c8288 in writev () from /lib64/libc.so.6
#1  0x00007f07eb9bf11d in unix_writev_buffer (opaque=0x7f07eca2de80, iov=0x7f07ede9b1e0, iovcnt=64,
    pos=259870577) at /mnt/sdb2/zjl/bsod0x101_0821/qemu-kvm-1.5.1/savevm.c:354
#2  0x00007f07eb9bf999 in qemu_fflush (f=0x7f07ede931b0)
    at /mnt/sdb2/zjl/bsod0x101_0821/qemu-kvm-1.5.1/savevm.c:600
#3  0x00007f07eb9c011f in add_to_iovec (f=0x7f07ede931b0, buf=0x7f000ee23000 "", size=4096)
    at /mnt/sdb2/zjl/bsod0x101_0821/qemu-kvm-1.5.1/savevm.c:756
#4  0x00007f07eb9c01c0 in qemu_put_buffer_async (f=0x7f07ede931b0, buf=0x7f000ee23000 "", size=4096)
    at /mnt/sdb2/zjl/bsod0x101_0821/qemu-kvm-1.5.1/savevm.c:772
#5  0x00007f07eb92ad2f in ram_save_block (f=0x7f07ede931b0, last_stage=false)
    at /mnt/sdb2/zjl/bsod0x101_0821/qemu-kvm-1.5.1/arch_init.c:493
#6  0x00007f07eb92b30c in ram_save_iterate (f=0x7f07ede931b0, opaque=0x0)
    at /mnt/sdb2/zjl/bsod0x101_0821/qemu-kvm-1.5.1/arch_init.c:654
#7  0x00007f07eb9c2e12 in qemu_savevm_state_iterate (f=0x7f07ede931b0)
    at /mnt/sdb2/zjl/bsod0x101_0821/qemu-kvm-1.5.1/savevm.c:1914
#8  0x00007f07eb8975e1 in migration_thread (opaque=0x7f07ebf53300 <current_migration.25325>)
    at migration.c:578
Then I cancel the migration task, the migration state in qemu will be set to MIG_STATE_CANCELLED, so the migration job in libvirt quits.
Then I perform migration again, at this time, the network reconnected successfully,
since the TCP timeout retransmission, above stack will not return immediately, so two migration tasks exist at the same time.
And still worse, source qemu will crash, because of accessing the NULL pointer in qemu_bh_schedule(s->cleanup_bh); statement in latter migration task, 
since the "s->cleanup_bh" had been deleted by previous migration task.

> Also please use pbonzini@redhat.com instead.  My Gmail address is an
> implementation detail. :)
> 
> > Signed-off-by: Zeng Junliang <zengjunliang@huawei.com>
> 
> It looks like the author of the patch is not the same as you.  If so,
> you need to make Zeng Junliang the author (using --author on the "git
> commit" command line) and add your own signoff line.
So sorry for my poor experience.

> 
> Paolo
> 

Avoid starting a new migration task while the previous one still exist.

Signed-off-by: Zeng Junliang <zengjunliang@huawei.com>
Signed-off-by: Zhang Haoyu <haoyu.zhang@huawei.com>
---
 migration.c |   34 ++++++++++++++++++++++------------
 1 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/migration.c b/migration.c
index 2b1ab20..ab4c439 100644
--- a/migration.c
+++ b/migration.c
@@ -40,8 +40,10 @@ enum {
     MIG_STATE_ERROR = -1,
     MIG_STATE_NONE,
     MIG_STATE_SETUP,
+    MIG_STATE_CANCELLING,
     MIG_STATE_CANCELLED,
     MIG_STATE_ACTIVE,
+    MIG_STATE_COMPLETING,
     MIG_STATE_COMPLETED,
 };
 
@@ -196,6 +198,8 @@ MigrationInfo *qmp_query_migrate(Error **errp)
         info->has_total_time = false;
         break;
     case MIG_STATE_ACTIVE:
+    case MIG_STATE_CANCELLING:
+    case MIG_STATE_COMPLETING:
         info->has_status = true;
         info->status = g_strdup("active");
         info->has_total_time = true;
@@ -282,6 +286,13 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 
 /* shared migration helpers */
 
+static void migrate_set_state(MigrationState *s, int old_state, int new_state)
+{
+    if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
+        trace_migrate_set_state(new_state);
+    }
+}
+
 static void migrate_fd_cleanup(void *opaque)
 {
     MigrationState *s = opaque;
@@ -301,20 +312,18 @@ static void migrate_fd_cleanup(void *opaque)
 
     assert(s->state != MIG_STATE_ACTIVE);
 
-    if (s->state != MIG_STATE_COMPLETED) {
+    if (s->state != MIG_STATE_COMPLETING) {
         qemu_savevm_state_cancel();
+        if (s->state == MIG_STATE_CANCELLING) {
+            migrate_set_state(s, MIG_STATE_CANCELLING, MIG_STATE_CANCELLED); 
+        }
+    }else {
+        migrate_set_state(s, MIG_STATE_COMPLETING, MIG_STATE_COMPLETED); 
     }
 
     notifier_list_notify(&migration_state_notifiers, s);
 }
 
-static void migrate_set_state(MigrationState *s, int old_state, int new_state)
-{
-    if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
-        trace_migrate_set_state(new_state);
-    }
-}
-
 void migrate_fd_error(MigrationState *s)
 {
     DPRINTF("setting error state\n");
@@ -328,7 +337,7 @@ static void migrate_fd_cancel(MigrationState *s)
 {
     DPRINTF("cancelling migration\n");
 
-    migrate_set_state(s, s->state, MIG_STATE_CANCELLED);
+    migrate_set_state(s, s->state, MIG_STATE_CANCELLING);
 }
 
 void add_migration_state_change_notifier(Notifier *notify)
@@ -405,7 +414,8 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
     params.blk = has_blk && blk;
     params.shared = has_inc && inc;
 
-    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP) {
+    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP ||
+        s->state == MIG_STATE_COMPLETING || s->state == MIG_STATE_CANCELLING) {
         error_set(errp, QERR_MIGRATION_ACTIVE);
         return;
     }
@@ -594,7 +604,7 @@ static void *migration_thread(void *opaque)
                 }
 
                 if (!qemu_file_get_error(s->file)) {
-                    migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_COMPLETED);
+                    migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_COMPLETING);
                     break;
                 }
             }
@@ -634,7 +644,7 @@ static void *migration_thread(void *opaque)
     }
 
     qemu_mutex_lock_iothread();
-    if (s->state == MIG_STATE_COMPLETED) {
+    if (s->state == MIG_STATE_COMPLETING) {
         int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
         s->total_time = end_time - s->total_time;
         s->downtime = end_time - start_time;
-- 
1.7.3.1.msysgit.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH] migration: avoid starting a new migration task while the previous one still exist
  2013-11-05  2:23   ` Zhanghaoyu (A)
@ 2013-11-05  9:15     ` Paolo Bonzini
  2013-11-06  1:50       ` Zhanghaoyu (A)
  0 siblings, 1 reply; 7+ messages in thread
From: Paolo Bonzini @ 2013-11-05  9:15 UTC (permalink / raw)
  To: Zhanghaoyu (A)
  Cc: Huangweidong (C), Gleb Natapov, Michael S. Tsirkin,
	Marcelo Tosatti, Luonengjun, qemu-devel@nongnu.org, Wangrui (K)

Il 05/11/2013 03:23, Zhanghaoyu (A) ha scritto:
>>> Avoid starting a new migration task while the previous one still
>> exist.
>>
>> Can you explain how to reproduce the problem?
>>
> When network disconnection between source and destination happened, the migration thread stuck at below stack,
> #0  0x00007f07e96c8288 in writev () from /lib64/libc.so.6
> #1  0x00007f07eb9bf11d in unix_writev_buffer (opaque=0x7f07eca2de80, iov=0x7f07ede9b1e0, iovcnt=64,
>     pos=259870577) at /mnt/sdb2/zjl/bsod0x101_0821/qemu-kvm-1.5.1/savevm.c:354
> #2  0x00007f07eb9bf999 in qemu_fflush (f=0x7f07ede931b0)
>     at /mnt/sdb2/zjl/bsod0x101_0821/qemu-kvm-1.5.1/savevm.c:600
> #3  0x00007f07eb9c011f in add_to_iovec (f=0x7f07ede931b0, buf=0x7f000ee23000 "", size=4096)
>     at /mnt/sdb2/zjl/bsod0x101_0821/qemu-kvm-1.5.1/savevm.c:756
> #4  0x00007f07eb9c01c0 in qemu_put_buffer_async (f=0x7f07ede931b0, buf=0x7f000ee23000 "", size=4096)
>     at /mnt/sdb2/zjl/bsod0x101_0821/qemu-kvm-1.5.1/savevm.c:772
> #5  0x00007f07eb92ad2f in ram_save_block (f=0x7f07ede931b0, last_stage=false)
>     at /mnt/sdb2/zjl/bsod0x101_0821/qemu-kvm-1.5.1/arch_init.c:493
> #6  0x00007f07eb92b30c in ram_save_iterate (f=0x7f07ede931b0, opaque=0x0)
>     at /mnt/sdb2/zjl/bsod0x101_0821/qemu-kvm-1.5.1/arch_init.c:654
> #7  0x00007f07eb9c2e12 in qemu_savevm_state_iterate (f=0x7f07ede931b0)
>     at /mnt/sdb2/zjl/bsod0x101_0821/qemu-kvm-1.5.1/savevm.c:1914
> #8  0x00007f07eb8975e1 in migration_thread (opaque=0x7f07ebf53300 <current_migration.25325>)
>     at migration.c:578
> Then I cancel the migration task, the migration state in qemu will be set to MIG_STATE_CANCELLED, so the migration job in libvirt quits.
> Then I perform migration again, at this time, the network reconnected successfully,
> since the TCP timeout retransmission, above stack will not return immediately, so two migration tasks exist at the same time.
> And still worse, source qemu will crash, because of accessing the NULL pointer in qemu_bh_schedule(s->cleanup_bh); statement in latter migration task, 
> since the "s->cleanup_bh" had been deleted by previous migration task.

Thanks for explaining.  CANCELLING looks like a useful addition.

Why do you need both CANCELLING and COMPLETING?  The COMPLETED state
should be set only after all I/O is done.

I agree with Eric that the CANCELLING state should not be exposed via QMP.
"info migrate" and "query-migrate" can keep showing "active" for maximum
backwards compatibility.

More comments below.


> -    if (s->state != MIG_STATE_COMPLETED) {
> +    if (s->state != MIG_STATE_COMPLETING) {
>          qemu_savevm_state_cancel();
> +        if (s->state == MIG_STATE_CANCELLING) {
> +            migrate_set_state(s, MIG_STATE_CANCELLING, MIG_STATE_CANCELLED); 
> +        }

I think you can remove the "if" and unconditionally call migrate_set_state.

> +    }else {
> +        migrate_set_state(s, MIG_STATE_COMPLETING, MIG_STATE_COMPLETED); 
>      }
>  
>      notifier_list_notify(&migration_state_notifiers, s);
>  }
>  
> -static void migrate_set_state(MigrationState *s, int old_state, int new_state)
> -{
> -    if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
> -        trace_migrate_set_state(new_state);
> -    }
> -}
> -
>  void migrate_fd_error(MigrationState *s)
>  {
>      DPRINTF("setting error state\n");
> @@ -328,7 +337,7 @@ static void migrate_fd_cancel(MigrationState *s)
>  {
>      DPRINTF("cancelling migration\n");
>  
> -    migrate_set_state(s, s->state, MIG_STATE_CANCELLED);
> +    migrate_set_state(s, s->state, MIG_STATE_CANCELLING);

Here probably we want something like

    do {
        old_state = s->state;
        if (old_state != MIG_STATE_SETUP && old_state != MIG_STATE_ACTIVE) {
            break;
        }
        migrate_set_state(s, old_state, MIG_STATE_CANCELLING);
    } while (s->state != MIG_STATE_CANCELLING);

to avoid a bogus COMPLETED->CANCELLED transition.  Please separate the patch in
two parts:

(1) the first uses the above code, with CANCELLED instead of CANCELLING

(2) the second, similar to the one you have posted, introduces the new CANCELLING
state

Thanks,

Paolo

>  }
>  
>  void add_migration_state_change_notifier(Notifier *notify)
> @@ -405,7 +414,8 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
>      params.blk = has_blk && blk;
>      params.shared = has_inc && inc;
>  
> -    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP) {
> +    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP ||
> +        s->state == MIG_STATE_COMPLETING || s->state == MIG_STATE_CANCELLING) {
>          error_set(errp, QERR_MIGRATION_ACTIVE);
>          return;
>      }
> @@ -594,7 +604,7 @@ static void *migration_thread(void *opaque)
>                  }
>  
>                  if (!qemu_file_get_error(s->file)) {
> -                    migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_COMPLETED);
> +                    migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_COMPLETING);
>                      break;
>                  }
>              }
> @@ -634,7 +644,7 @@ static void *migration_thread(void *opaque)
>      }
>  
>      qemu_mutex_lock_iothread();
> -    if (s->state == MIG_STATE_COMPLETED) {
> +    if (s->state == MIG_STATE_COMPLETING) {
>          int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
>          s->total_time = end_time - s->total_time;
>          s->downtime = end_time - start_time;
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH] migration: avoid starting a new migration task while the previous one still exist
  2013-11-05  9:15     ` Paolo Bonzini
@ 2013-11-06  1:50       ` Zhanghaoyu (A)
  2013-11-06  9:04         ` Paolo Bonzini
  0 siblings, 1 reply; 7+ messages in thread
From: Zhanghaoyu (A) @ 2013-11-06  1:50 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Huangweidong (C), Gleb Natapov, Michael S. Tsirkin,
	Marcelo Tosatti, Luonengjun, qemu-devel@nongnu.org, Zengjunliang,
	Wangrui (K)

>>>> Avoid starting a new migration task while the previous one still
>>> exist.
>>>
>>> Can you explain how to reproduce the problem?
>>>
>> When network disconnection between source and destination happened, 
>> the migration thread stuck at below stack,
>> Then I cancel the migration task, the migration state in qemu will be set to MIG_STATE_CANCELLED, so the migration job in libvirt quits.
>> Then I perform migration again, at this time, the network reconnected 
>> successfully, since the TCP timeout retransmission, above stack will not return immediately, so two migration tasks exist at the same time.
>> And still worse, source qemu will crash, because of accessing the NULL 
>> pointer in qemu_bh_schedule(s->cleanup_bh); statement in latter migration task, since the "s->cleanup_bh" had been deleted by previous migration task.
>
>Thanks for explaining.  CANCELLING looks like a useful addition.
>
>Why do you need both CANCELLING and COMPLETING?  The COMPLETED state should be set only after all I/O is done.
>
There is a period of time from the timing of setting COMPLETED state to that of migration task exits,
so it's problematic in COMPLETED->CANCELLED transition, but if applying your below proposal, the problem gone.
do {
    old_state = s->state;
    if (old_state != MIG_STATE_SETUP && old_state != MIG_STATE_ACTIVE) {
        break;
    }
    migrate_set_state(s, old_state, MIG_STATE_CANCELLED);
} while (s->state != MIG_STATE_CANCELLED);

>I agree with Eric that the CANCELLING state should not be exposed via QMP.
>"info migrate" and "query-migrate" can keep showing "active" for maximum backwards compatibility.
>
>More comments below.
>
>
>> -    if (s->state != MIG_STATE_COMPLETED) {
>> +    if (s->state != MIG_STATE_COMPLETING) {
>>          qemu_savevm_state_cancel();
>> +        if (s->state == MIG_STATE_CANCELLING) {
>> +            migrate_set_state(s, MIG_STATE_CANCELLING, MIG_STATE_CANCELLED); 
>> +        }
>
>I think you can remove the "if" and unconditionally call migrate_set_state.

Do you mean to remove the "if (s->state == MIG_STATE_CANCELLING)" ?
The s->state probably is MIG_STATE_ERROR here, is it okay to unconditionally call migrate_set_state?

Thanks,
Zhang Haoyu

>
>> +    }else {
>> +        migrate_set_state(s, MIG_STATE_COMPLETING, 
>> + MIG_STATE_COMPLETED);
>>      }
>>  
>>      notifier_list_notify(&migration_state_notifiers, s);  }
>>  
>> -static void migrate_set_state(MigrationState *s, int old_state, int 
>> new_state) -{
>> -    if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
>> -        trace_migrate_set_state(new_state);
>> -    }
>> -}
>> -
>>  void migrate_fd_error(MigrationState *s)  {
>>      DPRINTF("setting error state\n"); @@ -328,7 +337,7 @@ static void 
>> migrate_fd_cancel(MigrationState *s)  {
>>      DPRINTF("cancelling migration\n");
>>  
>> -    migrate_set_state(s, s->state, MIG_STATE_CANCELLED);
>> +    migrate_set_state(s, s->state, MIG_STATE_CANCELLING);
>
>Here probably we want something like
>
>    do {
>        old_state = s->state;
>        if (old_state != MIG_STATE_SETUP && old_state != MIG_STATE_ACTIVE) {
>            break;
>        }
>        migrate_set_state(s, old_state, MIG_STATE_CANCELLING);
>    } while (s->state != MIG_STATE_CANCELLING);
>
>to avoid a bogus COMPLETED->CANCELLED transition.  Please separate the patch in two parts:
>
>(1) the first uses the above code, with CANCELLED instead of CANCELLING
>
>(2) the second, similar to the one you have posted, introduces the new CANCELLING state
>
>Thanks,
>
>Paolo

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH] migration: avoid starting a new migration task while the previous one still exist
  2013-11-06  1:50       ` Zhanghaoyu (A)
@ 2013-11-06  9:04         ` Paolo Bonzini
  0 siblings, 0 replies; 7+ messages in thread
From: Paolo Bonzini @ 2013-11-06  9:04 UTC (permalink / raw)
  To: Zhanghaoyu (A)
  Cc: Huangweidong (C), Gleb Natapov, Michael S. Tsirkin,
	Marcelo Tosatti, Luonengjun, qemu-devel@nongnu.org, Zengjunliang,
	Wangrui (K)

Il 06/11/2013 02:50, Zhanghaoyu (A) ha scritto:
>>>>> Avoid starting a new migration task while the previous one still
>>>> exist.
>>>>
>>>> Can you explain how to reproduce the problem?
>>>>
>>> When network disconnection between source and destination happened, 
>>> the migration thread stuck at below stack,
>>> Then I cancel the migration task, the migration state in qemu will be set to MIG_STATE_CANCELLED, so the migration job in libvirt quits.
>>> Then I perform migration again, at this time, the network reconnected 
>>> successfully, since the TCP timeout retransmission, above stack will not return immediately, so two migration tasks exist at the same time.
>>> And still worse, source qemu will crash, because of accessing the NULL 
>>> pointer in qemu_bh_schedule(s->cleanup_bh); statement in latter migration task, since the "s->cleanup_bh" had been deleted by previous migration task.
>>
>> Thanks for explaining.  CANCELLING looks like a useful addition.
>>
>> Why do you need both CANCELLING and COMPLETING?  The COMPLETED state should be set only after all I/O is done.
>
> There is a period of time from the timing of setting COMPLETED state to that of migration task exits,
> so it's problematic in COMPLETED->CANCELLED transition, but if applying your below proposal, the problem gone.
>
> do {
>     old_state = s->state;
>     if (old_state != MIG_STATE_SETUP && old_state != MIG_STATE_ACTIVE) {
>         break;
>     }
>     migrate_set_state(s, old_state, MIG_STATE_CANCELLED);
> } while (s->state != MIG_STATE_CANCELLED);

Ok.

>> I agree with Eric that the CANCELLING state should not be exposed via QMP.
>> "info migrate" and "query-migrate" can keep showing "active" for maximum backwards compatibility.
>>
>> More comments below.
>>
>>
>>> -    if (s->state != MIG_STATE_COMPLETED) {
>>> +    if (s->state != MIG_STATE_COMPLETING) {
>>>          qemu_savevm_state_cancel();
>>> +        if (s->state == MIG_STATE_CANCELLING) {
>>> +            migrate_set_state(s, MIG_STATE_CANCELLING, MIG_STATE_CANCELLED); 
>>> +        }
>>
>> I think you can remove the "if" and unconditionally call migrate_set_state.
> 
> Do you mean to remove the "if (s->state == MIG_STATE_CANCELLING)" ?
> The s->state probably is MIG_STATE_ERROR here, is it okay to unconditionally call migrate_set_state?

migrate_set_state has atomic_cmpxchg so it has an "implicit" if, but
you're right it's clearer this way.

Paolo

> Thanks,
> Zhang Haoyu
> 
>>
>>> +    }else {
>>> +        migrate_set_state(s, MIG_STATE_COMPLETING, 
>>> + MIG_STATE_COMPLETED);
>>>      }
>>>  
>>>      notifier_list_notify(&migration_state_notifiers, s);  }
>>>  
>>> -static void migrate_set_state(MigrationState *s, int old_state, int 
>>> new_state) -{
>>> -    if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
>>> -        trace_migrate_set_state(new_state);
>>> -    }
>>> -}
>>> -
>>>  void migrate_fd_error(MigrationState *s)  {
>>>      DPRINTF("setting error state\n"); @@ -328,7 +337,7 @@ static void 
>>> migrate_fd_cancel(MigrationState *s)  {
>>>      DPRINTF("cancelling migration\n");
>>>  
>>> -    migrate_set_state(s, s->state, MIG_STATE_CANCELLED);
>>> +    migrate_set_state(s, s->state, MIG_STATE_CANCELLING);
>>
>> Here probably we want something like
>>
>>    do {
>>        old_state = s->state;
>>        if (old_state != MIG_STATE_SETUP && old_state != MIG_STATE_ACTIVE) {
>>            break;
>>        }
>>        migrate_set_state(s, old_state, MIG_STATE_CANCELLING);
>>    } while (s->state != MIG_STATE_CANCELLING);
>>
>> to avoid a bogus COMPLETED->CANCELLED transition.  Please separate the patch in two parts:
>>
>> (1) the first uses the above code, with CANCELLED instead of CANCELLING
>>
>> (2) the second, similar to the one you have posted, introduces the new CANCELLING state
>>
>> Thanks,
>>
>> Paolo

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2013-11-06  9:05 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-11-04 11:26 [Qemu-devel] [PATCH] migration: avoid starting a new migration task while the previous one still exist Zhanghaoyu (A)
2013-11-04 11:30 ` Paolo Bonzini
2013-11-05  2:23   ` Zhanghaoyu (A)
2013-11-05  9:15     ` Paolo Bonzini
2013-11-06  1:50       ` Zhanghaoyu (A)
2013-11-06  9:04         ` Paolo Bonzini
2013-11-04 13:59 ` Eric Blake

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).