From: mrhines@linux.vnet.ibm.com
To: qemu-devel@nongnu.org
Cc: GILR@il.ibm.com, SADEKJ@il.ibm.com, pbonzini@redhat.com,
quintela@redhat.com, EREZH@il.ibm.com, owasserm@redhat.com,
junqing.wang@cs2c.com.cn, onom@us.ibm.com, abali@us.ibm.com,
isaku.yamahata@gmail.com, gokul@us.ibm.com, dbulkow@gmail.com,
hinesmr@cn.ibm.com, BIRAN@il.ibm.com, lig.fnst@cn.fujitsu.com,
"Michael R. Hines" <mrhines@us.ibm.com>
Subject: [Qemu-devel] [RFC PATCH v2 04/12] mc: support custom page loading and copying
Date: Tue, 18 Feb 2014 16:50:21 +0800 [thread overview]
Message-ID: <1392713429-18201-5-git-send-email-mrhines@linux.vnet.ibm.com> (raw)
In-Reply-To: <1392713429-18201-1-git-send-email-mrhines@linux.vnet.ibm.com>
From: "Michael R. Hines" <mrhines@us.ibm.com>
Just as RDMA has custom routines for saving memory,
this provides RDMA with custom routines for loading
and copying memory as well.
Micro-checkpointing needs this support to avoid modifying
the arch_init.c as little as possible while stilling being
able to load RDMA-based memory from checkpoints in a
performance-optimal way as they are received from the network.
Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
---
arch_init.c | 9 +++--
include/migration/migration.h | 33 ++++++++++++++++--
include/migration/qemu-file.h | 54 +++++++++++++++++++++++++++--
qemu-file.c | 80 +++++++++++++++++++++++++++++++++++++++++--
4 files changed, 167 insertions(+), 9 deletions(-)
diff --git a/arch_init.c b/arch_init.c
index b8364b0..db75120 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -540,7 +540,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
/* In doubt sent page as normal */
bytes_sent = -1;
ret = ram_control_save_page(f, block->offset,
- offset, TARGET_PAGE_SIZE, &bytes_sent);
+ block->host, offset, TARGET_PAGE_SIZE, &bytes_sent);
if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
if (ret != RAM_SAVE_CONTROL_DELAYED) {
@@ -1004,13 +1004,18 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
} else if (flags & RAM_SAVE_FLAG_PAGE) {
void *host;
+ int r;
host = host_from_stream_offset(f, addr, flags);
if (!host) {
return -EINVAL;
}
- qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+ r = ram_control_load_page(f, host, TARGET_PAGE_SIZE);
+
+ if (r == RAM_LOAD_CONTROL_NOT_SUPP) {
+ qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+ }
} else if (flags & RAM_SAVE_FLAG_XBZRLE) {
void *host = host_from_stream_offset(f, addr, flags);
if (!host) {
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 9c62e2f..5c1a574 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -190,9 +190,38 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags);
#define RAM_SAVE_CONTROL_NOT_SUPP -1000
#define RAM_SAVE_CONTROL_DELAYED -2000
+#define RAM_LOAD_CONTROL_NOT_SUPP -3000
+#define RAM_LOAD_CONTROL_DELAYED -4000
+#define RAM_COPY_CONTROL_NOT_SUPP -5000
+#define RAM_COPY_CONTROL_DELAYED -6000
-size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
- ram_addr_t offset, size_t size,
+#define RDMA_CONTROL_VERSION_CURRENT 1
+
+int ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
+ uint8_t *host_addr,
+ ram_addr_t offset, long size,
int *bytes_sent);
+int ram_control_load_page(QEMUFile *f,
+ void *host_addr,
+ long size);
+
+int ram_control_copy_page(QEMUFile *f,
+ ram_addr_t block_offset_dest,
+ ram_addr_t offset_dest,
+ ram_addr_t block_offset_source,
+ ram_addr_t offset_source,
+ long size);
+
+int migrate_use_mc(void);
+int migrate_use_mc_net(void);
+int migrate_use_mc_rdma_copy(void);
+
+#define MC_VERSION 1
+
+int mc_info_load(QEMUFile *f, void *opaque, int version_id);
+void mc_info_save(QEMUFile *f, void *opaque);
+
+void qemu_rdma_info_save(QEMUFile *f, void *opaque);
+int qemu_rdma_info_load(QEMUFile *f, void *opaque, int version_id);
#endif
diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index a191fb6..c50de0d 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -71,17 +71,63 @@ typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags);
#define RAM_CONTROL_ROUND 1
#define RAM_CONTROL_HOOK 2
#define RAM_CONTROL_FINISH 3
+#define RAM_CONTROL_FLUSH 4
/*
* This function allows override of where the RAM page
* is saved (such as RDMA, for example.)
*/
-typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque,
+typedef int (QEMURamSaveFunc)(QEMUFile *f, void *opaque,
ram_addr_t block_offset,
+ uint8_t *host_addr,
ram_addr_t offset,
- size_t size,
+ long size,
int *bytes_sent);
+/*
+ * This function allows override of where the RAM page
+ * is saved (such as RDMA, for example.)
+ */
+typedef int (QEMURamLoadFunc)(QEMUFile *f,
+ void *opaque,
+ void *host_addr,
+ long size);
+
+/*
+ * This function allows *local* RDMA copying memory between two registered
+ * RAMBlocks, both real ones as well as private memory areas independently
+ * registered by external callers (such as MC). If RDMA is not available,
+ * then this function does nothing and the caller should just use memcpy().
+ */
+typedef int (QEMURamCopyFunc)(QEMUFile *f, void *opaque,
+ ram_addr_t block_offset_dest,
+ ram_addr_t offset_dest,
+ ram_addr_t block_offset_source,
+ ram_addr_t offset_source,
+ long size);
+
+/*
+ * Inform the underlying transport of a new virtual memory area.
+ * If this area is an actual RAMBlock, then pass the corresponding
+ * parameters of that block.
+ * If this area is an arbitrary virtual memory address, then
+ * pass the same value for both @host_addr and @block_offset.
+ */
+typedef int (QEMURamAddFunc)(QEMUFile *f, void *opaque,
+ void *host_addr,
+ ram_addr_t block_offset,
+ uint64_t length);
+
+/*
+ * Remove an underlying new virtual memory area.
+ * If this area is an actual RAMBlock, then pass the corresponding
+ * parameters of that block.
+ * If this area is an arbitrary virtual memory address, then
+ * pass the same value for both @host_addr and @block_offset.
+ */
+typedef int (QEMURamRemoveFunc)(QEMUFile *f, void *opaque,
+ ram_addr_t block_offset);
+
typedef struct QEMUFileOps {
QEMUFilePutBufferFunc *put_buffer;
QEMUFileGetBufferFunc *get_buffer;
@@ -92,6 +138,10 @@ typedef struct QEMUFileOps {
QEMURamHookFunc *after_ram_iterate;
QEMURamHookFunc *hook_ram_load;
QEMURamSaveFunc *save_page;
+ QEMURamLoadFunc *load_page;
+ QEMURamCopyFunc *copy_page;
+ QEMURamAddFunc *add;
+ QEMURamRemoveFunc *remove;
} QEMUFileOps;
QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
diff --git a/qemu-file.c b/qemu-file.c
index 9473b67..3d7428f 100644
--- a/qemu-file.c
+++ b/qemu-file.c
@@ -501,14 +501,17 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags)
}
}
-size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
- ram_addr_t offset, size_t size, int *bytes_sent)
+int ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
+ uint8_t *host_addr,
+ ram_addr_t offset, long size, int *bytes_sent)
{
if (f->ops->save_page) {
int ret = f->ops->save_page(f, f->opaque, block_offset,
+ host_addr,
offset, size, bytes_sent);
- if (ret != RAM_SAVE_CONTROL_DELAYED) {
+ if (ret != RAM_SAVE_CONTROL_DELAYED
+ && ret != RAM_SAVE_CONTROL_NOT_SUPP) {
if (bytes_sent && *bytes_sent > 0) {
qemu_update_position(f, *bytes_sent);
} else if (ret < 0) {
@@ -522,6 +525,77 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
return RAM_SAVE_CONTROL_NOT_SUPP;
}
+int ram_control_load_page(QEMUFile *f, void *host_addr, long size)
+{
+ if (f->ops->load_page) {
+ int ret = f->ops->load_page(f, f->opaque, host_addr, size);
+
+ if (ret != RAM_LOAD_CONTROL_DELAYED
+ && ret != RAM_LOAD_CONTROL_NOT_SUPP) {
+ if (ret < 0) {
+ qemu_file_set_error(f, ret);
+ }
+ }
+
+ return ret;
+ }
+
+ return RAM_LOAD_CONTROL_NOT_SUPP;
+}
+
+int ram_control_copy_page(QEMUFile *f,
+ ram_addr_t block_offset_dest,
+ ram_addr_t offset_dest,
+ ram_addr_t block_offset_source,
+ ram_addr_t offset_source,
+ long size)
+{
+ if (f->ops->copy_page) {
+ int ret = f->ops->copy_page(f, f->opaque,
+ block_offset_dest,
+ offset_dest,
+ block_offset_source,
+ offset_source,
+ size);
+
+ if (ret != RAM_COPY_CONTROL_DELAYED) {
+ if (ret < 0) {
+ qemu_file_set_error(f, ret);
+ }
+ }
+
+ return ret;
+ }
+
+ return RAM_COPY_CONTROL_NOT_SUPP;
+}
+
+
+void ram_control_add(QEMUFile *f, void *host_addr,
+ ram_addr_t block_offset, uint64_t length)
+{
+ int ret = 0;
+
+ if (f->ops->add) {
+ ret = f->ops->add(f, f->opaque, host_addr, block_offset, length);
+ if (ret < 0) {
+ qemu_file_set_error(f, ret);
+ }
+ }
+}
+
+void ram_control_remove(QEMUFile *f, ram_addr_t block_offset)
+{
+ int ret = 0;
+
+ if (f->ops->remove) {
+ ret = f->ops->remove(f, f->opaque, block_offset);
+ if (ret < 0) {
+ qemu_file_set_error(f, ret);
+ }
+ }
+}
+
static void qemu_fill_buffer(QEMUFile *f)
{
int len;
--
1.8.1.2
next prev parent reply other threads:[~2014-02-18 8:51 UTC|newest]
Thread overview: 68+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-02-18 8:50 [Qemu-devel] [RFC PATCH v2 00/12] mc: fault tolerante through micro-checkpointing mrhines
2014-02-18 8:50 ` [Qemu-devel] [RFC PATCH v2 01/12] mc: add documentation for micro-checkpointing mrhines
2014-02-18 12:45 ` Dr. David Alan Gilbert
2014-02-19 1:40 ` Michael R. Hines
2014-02-19 11:27 ` Dr. David Alan Gilbert
2014-02-20 1:17 ` Michael R. Hines
2014-02-20 10:09 ` Dr. David Alan Gilbert
2014-02-20 11:14 ` Li Guang
2014-02-20 14:58 ` Michael R. Hines
2014-02-20 14:57 ` Michael R. Hines
2014-02-20 16:32 ` Dr. David Alan Gilbert
2014-02-21 4:54 ` Michael R. Hines
2014-02-21 9:44 ` Dr. David Alan Gilbert
2014-03-03 6:08 ` Michael R. Hines
2014-02-18 8:50 ` [Qemu-devel] [RFC PATCH v2 02/12] mc: timestamp migration_bitmap and KVM logdirty usage mrhines
2014-02-18 10:32 ` Dr. David Alan Gilbert
2014-02-19 1:42 ` Michael R. Hines
2014-03-11 21:31 ` Juan Quintela
2014-04-04 3:08 ` Michael R. Hines
2014-02-18 8:50 ` [Qemu-devel] [RFC PATCH v2 03/12] mc: introduce a 'checkpointing' status check into the VCPU states mrhines
2014-03-11 21:36 ` Juan Quintela
2014-04-04 3:11 ` Michael R. Hines
2014-03-11 21:40 ` Eric Blake
2014-04-04 3:12 ` Michael R. Hines
2014-02-18 8:50 ` mrhines [this message]
2014-02-18 8:50 ` [Qemu-devel] [RFC PATCH v2 05/12] rdma: accelerated memcpy() support and better external RDMA user interfaces mrhines
2014-02-18 8:50 ` [Qemu-devel] [RFC PATCH v2 06/12] mc: introduce state machine changes for MC mrhines
2014-02-19 1:00 ` Li Guang
2014-02-19 2:14 ` Michael R. Hines
2014-02-20 5:03 ` Michael R. Hines
2014-02-21 8:13 ` Michael R. Hines
2014-02-24 6:48 ` Li Guang
2014-02-26 2:52 ` Li Guang
2014-03-11 21:57 ` Juan Quintela
2014-04-04 3:50 ` Michael R. Hines
2014-02-18 8:50 ` [Qemu-devel] [RFC PATCH v2 07/12] mc: introduce additional QMP statistics for micro-checkpointing mrhines
2014-03-11 21:45 ` Eric Blake
2014-04-04 3:15 ` Michael R. Hines
2014-04-04 4:22 ` Eric Blake
2014-03-11 21:59 ` Juan Quintela
2014-04-04 3:55 ` Michael R. Hines
2014-02-18 8:50 ` [Qemu-devel] [RFC PATCH v2 08/12] mc: core logic mrhines
2014-02-19 1:07 ` Li Guang
2014-02-19 2:16 ` Michael R. Hines
2014-02-19 2:53 ` Li Guang
2014-02-19 4:27 ` Michael R. Hines
2014-02-18 8:50 ` [Qemu-devel] [RFC PATCH v2 09/12] mc: configure and makefile support mrhines
2014-02-18 8:50 ` [Qemu-devel] [RFC PATCH v2 10/12] mc: expose tunable parameter for checkpointing frequency mrhines
2014-03-11 21:49 ` Eric Blake
2014-03-11 22:15 ` Juan Quintela
2014-03-11 22:49 ` Eric Blake
2014-04-04 5:29 ` Michael R. Hines
2014-04-04 14:56 ` Eric Blake
2014-04-11 6:10 ` Michael R. Hines
2014-04-04 16:28 ` Dr. David Alan Gilbert
2014-04-04 16:35 ` Eric Blake
2014-04-04 3:29 ` Michael R. Hines
2014-02-18 8:50 ` [Qemu-devel] [RFC PATCH v2 11/12] mc: introduce new capabilities to control micro-checkpointing mrhines
2014-03-11 21:57 ` Eric Blake
2014-04-04 3:38 ` Michael R. Hines
2014-04-04 4:25 ` Eric Blake
2014-03-11 22:02 ` Juan Quintela
2014-03-11 22:07 ` Eric Blake
2014-04-04 3:57 ` Michael R. Hines
2014-04-04 3:56 ` Michael R. Hines
2014-02-18 8:50 ` [Qemu-devel] [RFC PATCH v2 12/12] mc: activate and use MC if requested mrhines
2014-02-18 9:28 ` [Qemu-devel] [RFC PATCH v2 00/12] mc: fault tolerante through micro-checkpointing Li Guang
2014-02-19 1:29 ` Michael R. Hines
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1392713429-18201-5-git-send-email-mrhines@linux.vnet.ibm.com \
--to=mrhines@linux.vnet.ibm.com \
--cc=BIRAN@il.ibm.com \
--cc=EREZH@il.ibm.com \
--cc=GILR@il.ibm.com \
--cc=SADEKJ@il.ibm.com \
--cc=abali@us.ibm.com \
--cc=dbulkow@gmail.com \
--cc=gokul@us.ibm.com \
--cc=hinesmr@cn.ibm.com \
--cc=isaku.yamahata@gmail.com \
--cc=junqing.wang@cs2c.com.cn \
--cc=lig.fnst@cn.fujitsu.com \
--cc=mrhines@us.ibm.com \
--cc=onom@us.ibm.com \
--cc=owasserm@redhat.com \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=quintela@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).