From: zhanghailiang <zhang.zhanghailiang@huawei.com>
To: qemu-devel@nongnu.org
Cc: aarcange@redhat.com,
zhanghailiang <zhang.zhanghailiang@huawei.com>,
hanweidong@huawei.com, quintela@redhat.com,
peter.huangpeng@huawei.com, dgilbert@redhat.com,
amit.shah@redhat.com
Subject: [Qemu-devel] [RFC 13/13] snapshot: Remove page's write-protect and copy the content during setup stage
Date: Thu, 7 Jan 2016 20:20:08 +0800 [thread overview]
Message-ID: <1452169208-840-14-git-send-email-zhang.zhanghailiang@huawei.com> (raw)
In-Reply-To: <1452169208-840-1-git-send-email-zhang.zhanghailiang@huawei.com>
If we modify VM's RAM (pages) during setup stage after enable write-protect
notification in snapshot thread, the modification action will get stuck because
we only remove the page's write-protect in savevm process, it blocked by itself.
To fix this bug, we will remove page's write-protect in fault thread during
the setup stage. Besides, we should not try to get global lock after setup stage,
or there maybe an deadlock error.
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
---
include/migration/migration.h | 4 ++--
migration/migration.c | 2 +-
migration/postcopy-ram.c | 17 ++++++++++++++++-
migration/ram.c | 37 +++++++++++++++++++++++++++++++------
4 files changed, 50 insertions(+), 10 deletions(-)
diff --git a/include/migration/migration.h b/include/migration/migration.h
index ef4c071..435de31 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -127,7 +127,7 @@ struct MigrationSrcPageRequest {
RAMBlock *rb;
hwaddr offset;
hwaddr len;
-
+ uint8_t *pages_copy_addr;
QSIMPLEQ_ENTRY(MigrationSrcPageRequest) next_req;
};
@@ -333,7 +333,7 @@ void global_state_store_running(void);
void flush_page_queue(MigrationState *ms);
int ram_save_queue_pages(MigrationState *ms, const char *rbname,
- ram_addr_t start, ram_addr_t len);
+ ram_addr_t start, ram_addr_t len, bool copy_pages);
PostcopyState postcopy_state_get(void);
/* Set the state and return the old state */
diff --git a/migration/migration.c b/migration/migration.c
index 3765c3b..bf4c7a1 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1248,7 +1248,7 @@ static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
return;
}
- if (ram_save_queue_pages(ms, rbname, start, len)) {
+ if (ram_save_queue_pages(ms, rbname, start, len, false)) {
mark_source_rp_bad(ms);
}
}
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 61392d3..2cf477d 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -543,13 +543,28 @@ static void *postcopy_ram_fault_thread(void *opaque)
MigrationState *ms = container_of(us, MigrationState,
userfault_state);
ret = ram_save_queue_pages(ms, qemu_ram_get_idstr(rb), rb_offset,
- hostpagesize);
+ hostpagesize, true);
if (ret < 0) {
error_report("%s: Save: %"PRIx64 " failed!",
__func__, (uint64_t)msg.arg.pagefault.address);
break;
}
+
+ /* Note: In the setup process, snapshot_thread may modify VM's
+ * write-protected pages, we should not block it there, or there
+ * will be an deadlock error.
+ */
+ if (migration_in_setup(ms)) {
+ uint64_t host = msg.arg.pagefault.address;
+
+ host &= ~(hostpagesize - 1);
+ ret = ram_set_pages_wp(host, getpagesize(), true,
+ us->userfault_fd);
+ if (ret < 0) {
+ error_report("Remove page's write-protect failed");
+ }
+ }
}
}
trace_postcopy_ram_fault_thread_exit();
diff --git a/migration/ram.c b/migration/ram.c
index 8656719..747f9aa 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -233,6 +233,7 @@ struct PageSearchStatus {
ram_addr_t offset;
/* Set once we wrap around */
bool complete_round;
+ uint8_t *pages_copy;
};
typedef struct PageSearchStatus PageSearchStatus;
@@ -742,7 +743,12 @@ static int ram_save_page(QEMUFile *f, PageSearchStatus *pss,
RAMBlock *block = pss->block;
ram_addr_t offset = pss->offset;
- p = block->host + offset;
+ /* If we have a copy of this page, use the backup page first */
+ if (pss->pages_copy) {
+ p = pss->pages_copy;
+ } else {
+ p = block->host + offset;
+ }
/* In doubt sent page as normal */
bytes_xmit = 0;
@@ -926,7 +932,12 @@ static int ram_save_compressed_page(QEMUFile *f, PageSearchStatus *pss,
RAMBlock *block = pss->block;
ram_addr_t offset = pss->offset;
- p = block->host + offset;
+ /* If we have a copy of this page, use the backup first */
+ if (pss->pages_copy) {
+ p = pss->pages_copy;
+ } else {
+ p = block->host + offset;
+ }
bytes_xmit = 0;
ret = ram_control_save_page(f, block->offset,
@@ -1043,7 +1054,7 @@ static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
* Returns: block (or NULL if none available)
*/
static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
- ram_addr_t *ram_addr_abs)
+ ram_addr_t *ram_addr_abs, uint8 **pages_copy_addr)
{
RAMBlock *block = NULL;
@@ -1055,7 +1066,7 @@ static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
*offset = entry->offset;
*ram_addr_abs = (entry->offset + entry->rb->offset) &
TARGET_PAGE_MASK;
-
+ *pages_copy_addr = entry->pages_copy_addr;
if (entry->len > TARGET_PAGE_SIZE) {
entry->len -= TARGET_PAGE_SIZE;
entry->offset += TARGET_PAGE_SIZE;
@@ -1086,9 +1097,10 @@ static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss,
RAMBlock *block;
ram_addr_t offset;
bool dirty;
+ uint8 *pages_backup_addr = NULL;
do {
- block = unqueue_page(ms, &offset, ram_addr_abs);
+ block = unqueue_page(ms, &offset, ram_addr_abs, &pages_backup_addr);
/*
* We're sending this page, and since it's postcopy nothing else
* will dirty it, and we must make sure it doesn't get sent again
@@ -1130,6 +1142,7 @@ static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss,
*/
pss->block = block;
pss->offset = offset;
+ pss->pages_copy = pages_backup_addr;
}
return !!block;
@@ -1166,7 +1179,7 @@ void flush_page_queue(MigrationState *ms)
* Return: 0 on success
*/
int ram_save_queue_pages(MigrationState *ms, const char *rbname,
- ram_addr_t start, ram_addr_t len)
+ ram_addr_t start, ram_addr_t len, bool copy_pages)
{
RAMBlock *ramblock;
@@ -1206,6 +1219,17 @@ int ram_save_queue_pages(MigrationState *ms, const char *rbname,
new_entry->rb = ramblock;
new_entry->offset = start;
new_entry->len = len;
+ if (copy_pages) {
+ /* Fix me: Better to realize a memory pool */
+ new_entry->pages_copy_addr = g_try_malloc0(len);
+
+ if (!new_entry->pages_copy_addr) {
+ error_report("%s: Failed to alloc memory", __func__);
+ return -1;
+ }
+
+ memcpy(new_entry->pages_copy_addr, ramblock_ptr(ramblock, start), len);
+ }
memory_region_ref(ramblock->mr);
qemu_mutex_lock(&ms->src_page_req_mutex);
@@ -1342,6 +1366,7 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
pss.block = last_seen_block;
pss.offset = last_offset;
pss.complete_round = false;
+ pss.pages_copy = NULL;
if (!pss.block) {
pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
--
1.8.3.1
next prev parent reply other threads:[~2016-01-07 12:20 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-01-07 12:19 [Qemu-devel] [RFC 00/13] Live memory snapshot based on userfaultfd zhanghailiang
2016-01-07 12:19 ` [Qemu-devel] [RFC 01/13] postcopy/migration: Split fault related state into struct UserfaultState zhanghailiang
2016-01-07 12:19 ` [Qemu-devel] [RFC 02/13] migration: Allow the migrate command to work on file: urls zhanghailiang
2016-07-13 16:12 ` Dr. David Alan Gilbert
2016-07-14 5:27 ` Hailiang Zhang
2016-01-07 12:19 ` [Qemu-devel] [RFC 03/13] migration: Allow -incoming " zhanghailiang
2016-01-11 20:02 ` Dr. David Alan Gilbert
2016-01-12 13:04 ` Hailiang Zhang
2016-01-07 12:19 ` [Qemu-devel] [RFC 04/13] migration: Create a snapshot thread to realize saving memory snapshot zhanghailiang
2016-01-07 12:20 ` [Qemu-devel] [RFC 05/13] migration: implement initialization work for snapshot zhanghailiang
2016-01-07 12:20 ` [Qemu-devel] [RFC 06/13] QEMUSizedBuffer: Introduce two help functions for qsb zhanghailiang
2016-01-07 12:20 ` [Qemu-devel] [RFC 07/13] savevm: Split qemu_savevm_state_complete_precopy() into two helper functions zhanghailiang
2016-01-07 12:20 ` [Qemu-devel] [RFC 08/13] snapshot: Save VM's device state into snapshot file zhanghailiang
2016-01-07 12:20 ` [Qemu-devel] [RFC 09/13] migration/postcopy-ram: fix some helper functions to support userfaultfd write-protect zhanghailiang
2016-01-07 12:20 ` [Qemu-devel] [RFC 10/13] snapshot: Enable the write-protect notification capability for VM's RAM zhanghailiang
2016-01-07 12:20 ` [Qemu-devel] [RFC 11/13] snapshot/migration: Save VM's RAM into snapshot file zhanghailiang
2016-01-07 12:20 ` [Qemu-devel] [RFC 12/13] migration/ram: Fix some helper functions' parameter to use PageSearchStatus zhanghailiang
2016-01-11 17:55 ` Dr. David Alan Gilbert
2016-01-12 12:59 ` Hailiang Zhang
2016-01-07 12:20 ` zhanghailiang [this message]
2016-07-13 17:52 ` [Qemu-devel] [RFC 13/13] snapshot: Remove page's write-protect and copy the content during setup stage Dr. David Alan Gilbert
2016-07-14 8:02 ` Hailiang Zhang
2016-07-04 12:22 ` [Qemu-devel] [RFC 00/13] Live memory snapshot based on userfaultfd Baptiste Reynal
2016-07-05 1:49 ` Hailiang Zhang
2016-07-05 9:57 ` Baptiste Reynal
2016-07-05 10:27 ` Hailiang Zhang
2016-08-18 15:56 ` Andrea Arcangeli
2016-08-20 6:31 ` Hailiang Zhang
2017-02-27 15:37 ` Christian Pinto
2017-02-28 1:48 ` Hailiang Zhang
2017-02-28 8:30 ` Christian Pinto
2017-02-28 16:14 ` Andrea Arcangeli
2017-03-01 1:08 ` Hailiang Zhang
2017-03-09 11:34 ` [Qemu-devel] [RFC PATCH 0/4] ARM/ARM64 fixes for live " Christian Pinto
2017-03-09 11:34 ` [Qemu-devel] [RFC PATCH 1/4] migration/postcopy-ram: check pagefault flags in userfaultfd thread Christian Pinto
2017-03-09 11:34 ` [Qemu-devel] [RFC PATCH 2/4] migration/ram: Fix for ARM/ARM64 page size Christian Pinto
2017-03-09 11:34 ` [Qemu-devel] [RFC PATCH 3/4] migration: snapshot thread Christian Pinto
2017-03-09 11:34 ` [Qemu-devel] [RFC PATCH 4/4] migration/postcopy-ram: ram_set_pages_wp fix Christian Pinto
2017-03-09 17:46 ` [Qemu-devel] [RFC PATCH 0/4] ARM/ARM64 fixes for live memory snapshot based on userfaultfd Dr. David Alan Gilbert
2017-03-10 8:15 ` Christian Pinto
2016-09-06 3:39 ` [Qemu-devel] [RFC 00/13] Live " Hailiang Zhang
2016-09-18 2:14 ` Hailiang Zhang
2016-12-08 12:45 ` Hailiang Zhang
2016-07-05 14:59 ` Andrea Arcangeli
2016-07-13 18:02 ` Dr. David Alan Gilbert
2016-07-14 10:24 ` Hailiang Zhang
2016-07-14 11:43 ` Dr. David Alan Gilbert
2016-07-19 6:53 ` Hailiang Zhang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1452169208-840-14-git-send-email-zhang.zhanghailiang@huawei.com \
--to=zhang.zhanghailiang@huawei.com \
--cc=aarcange@redhat.com \
--cc=amit.shah@redhat.com \
--cc=dgilbert@redhat.com \
--cc=hanweidong@huawei.com \
--cc=peter.huangpeng@huawei.com \
--cc=qemu-devel@nongnu.org \
--cc=quintela@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).