* [Qemu-devel] [PATCH 1/2] Migration: New bitmap for postcopy migration failure
2016-08-16 0:25 [Qemu-devel] [PATCH 0/2] Bitmap for recovery of postcopy migration from n/w failure Md Haris Iqbal
@ 2016-08-16 0:25 ` Md Haris Iqbal
2016-08-16 0:25 ` [Qemu-devel] [PATCH 2/2] Migration: Request lost pages (due to n/w failure) from source Md Haris Iqbal
1 sibling, 0 replies; 4+ messages in thread
From: Md Haris Iqbal @ 2016-08-16 0:25 UTC (permalink / raw)
To: qemu-devel; +Cc: dgilbert, Md Haris Iqbal
Signed-off-by: Md Haris Iqbal <haris.phnx@gmail.com>
---
include/migration/migration.h | 3 +++
migration/migration.c | 8 ++++--
migration/postcopy-ram.c | 2 +-
migration/ram.c | 63 ++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 72 insertions(+), 4 deletions(-)
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 74d456e..0a42b87 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -358,6 +358,9 @@ int ram_save_queue_pages(MigrationState *ms, const char *rbname,
int qemu_migrate_postcopy_outgoing_recovery(MigrationState *ms);
int qemu_migrate_postcopy_incoming_recovery(QEMUFile **f,MigrationIncomingState* mis);
+void migrate_incoming_ram_bitmap_init(void);
+void migrate_incoming_ram_bitmap_update(RAMBlock *rb, ram_addr_t addr);
+
PostcopyState postcopy_state_get(void);
/* Set the state and return the old state */
PostcopyState postcopy_state_set(PostcopyState new_state);
diff --git a/migration/migration.c b/migration/migration.c
index 4edd77c..99138dd 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -381,6 +381,10 @@ static void process_incoming_migration_co(void *opaque)
postcopy_state_set(POSTCOPY_INCOMING_NONE);
migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
MIGRATION_STATUS_ACTIVE);
+
+ /* Initializing the bitmap for destination side */
+ migrate_incoming_ram_bitmap_init();
+
ret = qemu_loadvm_state(f);
ps = postcopy_state_get();
diff --git a/migration/ram.c b/migration/ram.c
index 815bc0e..4f16243 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -250,6 +250,13 @@ static struct BitmapRcu {
* of the postcopy phase
*/
unsigned long *unsentmap;
+ /*
+ * A new bitmap for postcopy network failure recovery.
+ * It keeps track of the pages recieved.
+ * In the end, it would be used to request pages that were
+ * lost due to network failure.
+ */
+ unsigned long *not_received;
} *migration_bitmap_rcu;
struct CompressParam {
@@ -2340,6 +2347,7 @@ static int ram_load_postcopy(QEMUFile *f)
void *page_buffer = NULL;
void *place_source = NULL;
uint8_t ch;
+ RAMBlock* block = NULL;
addr = qemu_get_be64(f);
flags = addr & ~TARGET_PAGE_MASK;
@@ -2348,7 +2356,7 @@ static int ram_load_postcopy(QEMUFile *f)
trace_ram_load_postcopy_loop((uint64_t)addr, flags);
place_needed = false;
if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
- RAMBlock *block = ram_block_from_stream(f, flags);
+ block = ram_block_from_stream(f, flags);
host = host_from_ram_block_offset(block, addr);
if (!host) {
@@ -2436,6 +2444,15 @@ static int ram_load_postcopy(QEMUFile *f)
if (!ret) {
ret = qemu_file_get_error(f);
}
+ if (block != NULL) {
+ /*
+ * TODO
+ * We need to delay updating the bits until host page is
+ * recieved and the place is done, or tidy up the bitmap later
+ * accordingly (whether whole host page was recieved or not)
+ */
+ migrate_incoming_ram_bitmap_update(block, addr);
+ }
}
return ret;
@@ -2483,6 +2500,16 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
RAMBlock *block = ram_block_from_stream(f, flags);
host = host_from_ram_block_offset(block, addr);
+
+ migrate_incoming_ram_bitmap_update(block, addr);
+ /*
+ * TODO
+ * 1) Do we need a bitmap_update call later in the while loop also?
+ * 2) We need to delay updating the bits until host page is
+ * recieved and the place is done, or tidy up the bitmap later
+ * accordingly (whether whole host page was recieved or not)
+ */
+
if (!host) {
error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
ret = -EINVAL;
@@ -2578,6 +2605,40 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
return ret;
}
+void migrate_incoming_ram_bitmap_init(void)
+{
+ int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
+
+ /*
+ * A new bitmap for postcopy network failure recovery.
+ * It keeps track of the pages recieved.
+ * In the end, it would be used to request pages that were
+ * lost due to network failure.
+ */
+
+ ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
+ migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
+ migration_bitmap_rcu->not_received = bitmap_new(ram_bitmap_pages);
+ bitmap_set(migration_bitmap_rcu->not_received, 0, ram_bitmap_pages);
+}
+
+void migrate_incoming_ram_bitmap_update(RAMBlock *rb, ram_addr_t addr)
+{
+ unsigned long base = rb->offset >> TARGET_PAGE_BITS;
+ unsigned long nr = base + (addr >> TARGET_PAGE_BITS);
+ unsigned long *bitmap;
+
+ bitmap = atomic_rcu_read(&migration_bitmap_rcu)->not_received;
+ clear_bit(nr, bitmap);
+
+ static int count = 0;
+ count++;
+ if(count == 1000) {
+ count = 0;
+ ram_debug_dump_bitmap(bitmap, true);
+ }
+}
+
static SaveVMHandlers savevm_ram_handlers = {
.save_live_setup = ram_save_setup,
.save_live_iterate = ram_save_iterate,
--
2.7.4
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [Qemu-devel] [PATCH 2/2] Migration: Request lost pages (due to n/w failure) from source
2016-08-16 0:25 [Qemu-devel] [PATCH 0/2] Bitmap for recovery of postcopy migration from n/w failure Md Haris Iqbal
2016-08-16 0:25 ` [Qemu-devel] [PATCH 1/2] Migration: New bitmap for postcopy migration failure Md Haris Iqbal
@ 2016-08-16 0:25 ` Md Haris Iqbal
2016-08-16 15:31 ` Dr. David Alan Gilbert
1 sibling, 1 reply; 4+ messages in thread
From: Md Haris Iqbal @ 2016-08-16 0:25 UTC (permalink / raw)
To: qemu-devel; +Cc: dgilbert, Md Haris Iqbal
Signed-off-by: Md Haris Iqbal <haris.phnx@gmail.com>
---
include/migration/migration.h | 7 +++++++
migration/migration.c | 2 ++
migration/ram.c | 35 +++++++++++++++++++++++++++++++++++
migration/savevm.c | 19 +++++++++++++++++++
4 files changed, 63 insertions(+)
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 0a42b87..4c787ce 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -36,6 +36,7 @@
#define QEMU_VM_CONFIGURATION 0x07
#define QEMU_VM_COMMAND 0x08
#define QEMU_VM_SECTION_FOOTER 0x7e
+#define QEMU_VM_ALMOST_COMPLETE 0x09
struct MigrationParams {
bool blk;
@@ -145,6 +146,11 @@ struct MigrationState
int state;
/* Old style params from 'migrate' command */
MigrationParams params;
+ /*
+ * Don't need 2 variables for recovery.
+ * Clean this up, use a single variable with different states.
+ */
+ bool recovered_once;
bool in_recovery;
/* State related to return path */
@@ -360,6 +366,7 @@ int qemu_migrate_postcopy_incoming_recovery(QEMUFile **f,MigrationIncomingState*
void migrate_incoming_ram_bitmap_init(void);
void migrate_incoming_ram_bitmap_update(RAMBlock *rb, ram_addr_t addr);
+void *migrate_incoming_ram_req_pages(void *opaque);
PostcopyState postcopy_state_get(void);
/* Set the state and return the old state */
diff --git a/migration/migration.c b/migration/migration.c
index 99138dd..be24b69 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1042,6 +1042,7 @@ MigrationState *migrate_init(const MigrationParams *params)
s->xfer_limit = 0;
s->cleanup_bh = 0;
s->to_dst_file = NULL;
+ s->recovered_once = false;
s->in_recovery = false;
s->state = MIGRATION_STATUS_NONE;
s->params = *params;
@@ -1918,6 +1919,7 @@ static void *migration_thread(void *opaque)
if(ret == 0) {
current_active_state = MIGRATION_STATUS_POSTCOPY_ACTIVE;
runstate_set(RUN_STATE_FINISH_MIGRATE);
+ s->recovered_once = true;
qemu_file_clear_error(s->to_dst_file);
continue;
}
diff --git a/migration/ram.c b/migration/ram.c
index 4f16243..445b863 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2639,6 +2639,41 @@ void migrate_incoming_ram_bitmap_update(RAMBlock *rb, ram_addr_t addr)
}
}
+void *migrate_incoming_ram_req_pages(void* opaque)
+{
+ MigrationIncomingState *mis = opaque;
+ struct RAMBlock *rb;
+ size_t hostpagesize = getpagesize();
+ uint64_t addr;
+ unsigned long base;
+ unsigned long nr;
+ unsigned long size;
+ unsigned long next;
+ unsigned long *not_received;
+
+ not_received = atomic_rcu_read(&migration_bitmap_rcu)->not_received;
+ QLIST_FOREACH_RCU(rb, &ram_list.blocks, next) {
+ addr = 0;
+ base = rb->offset >> TARGET_PAGE_BITS;
+ size = base + (rb->used_length >> TARGET_PAGE_BITS);
+ while (true) {
+ nr = base + (addr >> TARGET_PAGE_BITS);
+ next = find_next_bit(not_received, size, nr);
+ addr = (next - base) << TARGET_PAGE_BITS;
+
+ if (addr >= rb->used_length) {
+ break;
+ }
+ else {
+ migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
+ addr, hostpagesize);
+ addr++;
+ }
+ }
+ }
+ return NULL;
+}
+
static SaveVMHandlers savevm_ram_handlers = {
.save_live_setup = ram_save_setup,
.save_live_iterate = ram_save_iterate,
diff --git a/migration/savevm.c b/migration/savevm.c
index 5fa39c1..103f0b8 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -986,6 +986,12 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f)
{
SaveStateEntry *se;
int ret;
+ MigrationState* ms = migrate_get_current();
+
+ if (ms->recovered_once == true) {
+ qemu_put_byte(f, QEMU_VM_ALMOST_COMPLETE);
+ qemu_fflush(f);
+ }
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!se->ops || !se->ops->save_live_complete_postcopy) {
@@ -1830,6 +1836,7 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
uint8_t section_type;
int ret;
PostcopyState ps;
+ QemuThread req_pages_not_received;
while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
@@ -1851,6 +1858,18 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
return ret;
}
break;
+ case QEMU_VM_ALMOST_COMPLETE:
+ /*
+ * This case will only be used when migration recovers from a
+ * network failure during a postcopy migration.
+ * Now, send the requests for pages that were lost due to the
+ * network failure.
+ */
+ qemu_thread_create(&req_pages_not_received,
+ "postcopy/req_pages_not_received",
+ migrate_incoming_ram_req_pages, mis,
+ QEMU_THREAD_DETACHED);
+ break;
default:
error_report("Unknown savevm section type %d", section_type);
return -EINVAL;
--
2.7.4
^ permalink raw reply related [flat|nested] 4+ messages in thread