qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com>
To: qemu-devel@nongnu.org
Cc: quintela@redhat.com, ashijeetacharya@gmail.com, amit@kernel.org,
	pbutsykin@virtuozzo.com, zhang.zhanghailiang@huawei.com
Subject: [Qemu-devel] [PULL 09/14] COLO: Shutdown related socket fd while do failover
Date: Mon, 13 Feb 2017 17:50:28 +0000	[thread overview]
Message-ID: <20170213175033.7314-10-dgilbert@redhat.com> (raw)
In-Reply-To: <20170213175033.7314-1-dgilbert@redhat.com>

From: zhanghailiang <zhang.zhanghailiang@huawei.com>

If the net connection between primary host and secondary host breaks
while COLO/COLO incoming threads are doing read() or write().
It will block until connection is timeout, and the failover process
will be blocked because of it.

So it is necessary to shutdown all the socket fds used by COLO
to avoid this situation. Besides, we should close the corresponding
file descriptors after failvoer BH shutdown them,
Or there will be an error.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1484657864-21708-3-git-send-email-zhang.zhanghailiang@huawei.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 include/migration/migration.h |  3 +++
 migration/colo.c              | 43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index cb83f16..1735d66 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -116,6 +116,7 @@ struct MigrationIncomingState {
     QemuThread colo_incoming_thread;
     /* The coroutine we should enter (back) after failover */
     Coroutine *migration_incoming_co;
+    QemuSemaphore colo_incoming_sem;
 
     /* See savevm.c */
     LoadStateEntry_Head loadvm_handlers;
@@ -187,6 +188,8 @@ struct MigrationState
     QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) src_page_requests;
     /* The RAMBlock used in the last src_page_request */
     RAMBlock *last_req_rb;
+    /* The semaphore is used to notify COLO thread that failover is finished */
+    QemuSemaphore colo_exit_sem;
 
     /* The semaphore is used to notify COLO thread to do checkpoint */
     QemuSemaphore colo_checkpoint_sem;
diff --git a/migration/colo.c b/migration/colo.c
index 08b2e46..3222812 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -59,6 +59,18 @@ static void secondary_vm_do_failover(void)
         /* recover runstate to normal migration finish state */
         autostart = true;
     }
+    /*
+     * Make sure COLO incoming thread not block in recv or send,
+     * If mis->from_src_file and mis->to_src_file use the same fd,
+     * The second shutdown() will return -1, we ignore this value,
+     * It is harmless.
+     */
+    if (mis->from_src_file) {
+        qemu_file_shutdown(mis->from_src_file);
+    }
+    if (mis->to_src_file) {
+        qemu_file_shutdown(mis->to_src_file);
+    }
 
     old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
                                    FAILOVER_STATUS_COMPLETED);
@@ -67,6 +79,8 @@ static void secondary_vm_do_failover(void)
                      "secondary VM", FailoverStatus_lookup[old_state]);
         return;
     }
+    /* Notify COLO incoming thread that failover work is finished */
+    qemu_sem_post(&mis->colo_incoming_sem);
     /* For Secondary VM, jump to incoming co */
     if (mis->migration_incoming_co) {
         qemu_coroutine_enter(mis->migration_incoming_co);
@@ -81,6 +95,18 @@ static void primary_vm_do_failover(void)
     migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
                       MIGRATION_STATUS_COMPLETED);
 
+    /*
+     * Wake up COLO thread which may blocked in recv() or send(),
+     * The s->rp_state.from_dst_file and s->to_dst_file may use the
+     * same fd, but we still shutdown the fd for twice, it is harmless.
+     */
+    if (s->to_dst_file) {
+        qemu_file_shutdown(s->to_dst_file);
+    }
+    if (s->rp_state.from_dst_file) {
+        qemu_file_shutdown(s->rp_state.from_dst_file);
+    }
+
     old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
                                    FAILOVER_STATUS_COMPLETED);
     if (old_state != FAILOVER_STATUS_ACTIVE) {
@@ -88,6 +114,8 @@ static void primary_vm_do_failover(void)
                      FailoverStatus_lookup[old_state]);
         return;
     }
+    /* Notify COLO thread that failover work is finished */
+    qemu_sem_post(&s->colo_exit_sem);
 }
 
 void colo_do_failover(MigrationState *s)
@@ -361,6 +389,14 @@ out:
 
     timer_del(s->colo_delay_timer);
 
+    /* Hope this not to be too long to wait here */
+    qemu_sem_wait(&s->colo_exit_sem);
+    qemu_sem_destroy(&s->colo_exit_sem);
+    /*
+     * Must be called after failover BH is completed,
+     * Or the failover BH may shutdown the wrong fd that
+     * re-used by other threads after we release here.
+     */
     if (s->rp_state.from_dst_file) {
         qemu_fclose(s->rp_state.from_dst_file);
     }
@@ -385,6 +421,7 @@ void migrate_start_colo_process(MigrationState *s)
     s->colo_delay_timer =  timer_new_ms(QEMU_CLOCK_HOST,
                                 colo_checkpoint_notify, s);
 
+    qemu_sem_init(&s->colo_exit_sem, 0);
     migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
                       MIGRATION_STATUS_COLO);
     colo_process_checkpoint(s);
@@ -423,6 +460,8 @@ void *colo_process_incoming_thread(void *opaque)
     uint64_t value;
     Error *local_err = NULL;
 
+    qemu_sem_init(&mis->colo_incoming_sem, 0);
+
     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
                       MIGRATION_STATUS_COLO);
 
@@ -533,6 +572,10 @@ out:
         qemu_fclose(fb);
     }
 
+    /* Hope this not to be too long to loop here */
+    qemu_sem_wait(&mis->colo_incoming_sem);
+    qemu_sem_destroy(&mis->colo_incoming_sem);
+    /* Must be called after failover BH is completed */
     if (mis->to_src_file) {
         qemu_fclose(mis->to_src_file);
     }
-- 
2.9.3

  parent reply	other threads:[~2017-02-13 17:50 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-13 17:50 [Qemu-devel] [PULL 00/14] migration queue Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 01/14] migration: remove myself as maintainer Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 02/14] MAINTAINERS: update my email address Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 03/14] migration: add MigrationState arg for ram_save_/compressed_/page() Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 04/14] add 'release-ram' migrate capability Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 05/14] migration: discard non-dirty ram pages after the start of postcopy Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 06/14] migrate: Introduce zero RAM checks to skip RAM migration Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 07/14] migration: consolidate VMStateField.start Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 08/14] COLO: fix setting checkpoint-delay not working properly Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` Dr. David Alan Gilbert (git) [this message]
2017-02-13 17:50 ` [Qemu-devel] [PULL 10/14] COLO: Don't process failover request while loading VM's state Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 11/14] migration: Add VMSTATE_UNUSED_VARRAY_UINT32 Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 12/14] migration: Add VMSTATE_WITH_TMP Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 13/14] tests/migration: Add test for VMSTATE_WITH_TMP Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 14/14] virtio/migration: Migrate virtio-net to VMState Dr. David Alan Gilbert (git)
2017-02-14  9:52 ` [Qemu-devel] [PULL 00/14] migration queue Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170213175033.7314-10-dgilbert@redhat.com \
    --to=dgilbert@redhat.com \
    --cc=amit@kernel.org \
    --cc=ashijeetacharya@gmail.com \
    --cc=pbutsykin@virtuozzo.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=zhang.zhanghailiang@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).