qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: mrhines@linux.vnet.ibm.com
To: qemu-devel@nongnu.org
Cc: GILR@il.ibm.com, SADEKJ@il.ibm.com, pbonzini@redhat.com,
	quintela@redhat.com, EREZH@il.ibm.com, owasserm@redhat.com,
	junqing.wang@cs2c.com.cn, onom@us.ibm.com, abali@us.ibm.com,
	isaku.yamahata@gmail.com, gokul@us.ibm.com, dbulkow@gmail.com,
	hinesmr@cn.ibm.com, BIRAN@il.ibm.com, lig.fnst@cn.fujitsu.com,
	"Michael R. Hines" <mrhines@us.ibm.com>
Subject: [Qemu-devel] [RFC PATCH v2 04/12] mc: support custom page loading and copying
Date: Tue, 18 Feb 2014 16:50:21 +0800	[thread overview]
Message-ID: <1392713429-18201-5-git-send-email-mrhines@linux.vnet.ibm.com> (raw)
In-Reply-To: <1392713429-18201-1-git-send-email-mrhines@linux.vnet.ibm.com>

From: "Michael R. Hines" <mrhines@us.ibm.com>

Just as RDMA has custom routines for saving memory,
this provides RDMA with custom routines for loading
and copying memory as well.

Micro-checkpointing needs this support to avoid modifying
the arch_init.c as little as possible while stilling being
able to load RDMA-based memory from checkpoints in a
performance-optimal way as they are received from the network.

Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
---
 arch_init.c                   |  9 +++--
 include/migration/migration.h | 33 ++++++++++++++++--
 include/migration/qemu-file.h | 54 +++++++++++++++++++++++++++--
 qemu-file.c                   | 80 +++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 167 insertions(+), 9 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index b8364b0..db75120 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -540,7 +540,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
             /* In doubt sent page as normal */
             bytes_sent = -1;
             ret = ram_control_save_page(f, block->offset,
-                               offset, TARGET_PAGE_SIZE, &bytes_sent);
+                       block->host, offset, TARGET_PAGE_SIZE, &bytes_sent);
 
             if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
                 if (ret != RAM_SAVE_CONTROL_DELAYED) {
@@ -1004,13 +1004,18 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
             ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
         } else if (flags & RAM_SAVE_FLAG_PAGE) {
             void *host;
+            int r;
 
             host = host_from_stream_offset(f, addr, flags);
             if (!host) {
                 return -EINVAL;
             }
 
-            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+            r = ram_control_load_page(f, host, TARGET_PAGE_SIZE);
+
+            if (r == RAM_LOAD_CONTROL_NOT_SUPP) {
+                qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+            }
         } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
             void *host = host_from_stream_offset(f, addr, flags);
             if (!host) {
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 9c62e2f..5c1a574 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -190,9 +190,38 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags);
 
 #define RAM_SAVE_CONTROL_NOT_SUPP -1000
 #define RAM_SAVE_CONTROL_DELAYED  -2000
+#define RAM_LOAD_CONTROL_NOT_SUPP -3000
+#define RAM_LOAD_CONTROL_DELAYED  -4000
+#define RAM_COPY_CONTROL_NOT_SUPP -5000
+#define RAM_COPY_CONTROL_DELAYED  -6000
 
-size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
-                             ram_addr_t offset, size_t size,
+#define RDMA_CONTROL_VERSION_CURRENT 1
+
+int ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
+                             uint8_t *host_addr,
+                             ram_addr_t offset, long size,
                              int *bytes_sent);
 
+int ram_control_load_page(QEMUFile *f,
+                             void *host_addr,
+                             long size);
+
+int ram_control_copy_page(QEMUFile *f, 
+                             ram_addr_t block_offset_dest,
+                             ram_addr_t offset_dest,
+                             ram_addr_t block_offset_source,
+                             ram_addr_t offset_source,
+                             long size);
+
+int migrate_use_mc(void);
+int migrate_use_mc_net(void);
+int migrate_use_mc_rdma_copy(void);
+
+#define MC_VERSION 1
+
+int mc_info_load(QEMUFile *f, void *opaque, int version_id);
+void mc_info_save(QEMUFile *f, void *opaque);
+
+void qemu_rdma_info_save(QEMUFile *f, void *opaque);
+int qemu_rdma_info_load(QEMUFile *f, void *opaque, int version_id);
 #endif
diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index a191fb6..c50de0d 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -71,17 +71,63 @@ typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags);
 #define RAM_CONTROL_ROUND    1
 #define RAM_CONTROL_HOOK     2
 #define RAM_CONTROL_FINISH   3
+#define RAM_CONTROL_FLUSH    4
 
 /*
  * This function allows override of where the RAM page
  * is saved (such as RDMA, for example.)
  */
-typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque,
+typedef int (QEMURamSaveFunc)(QEMUFile *f, void *opaque,
                                ram_addr_t block_offset,
+                               uint8_t *host_addr,
                                ram_addr_t offset,
-                               size_t size,
+                               long size,
                                int *bytes_sent);
 
+/*
+ * This function allows override of where the RAM page
+ * is saved (such as RDMA, for example.)
+ */
+typedef int (QEMURamLoadFunc)(QEMUFile *f,
+                               void *opaque,
+                               void *host_addr,
+                               long size);
+
+/*
+ * This function allows *local* RDMA copying memory between two registered
+ * RAMBlocks, both real ones as well as private memory areas independently
+ * registered by external callers (such as MC). If RDMA is not available,
+ * then this function does nothing and the caller should just use memcpy().
+ */
+typedef int (QEMURamCopyFunc)(QEMUFile *f, void *opaque,
+                               ram_addr_t block_offset_dest,
+                               ram_addr_t offset_dest,
+                               ram_addr_t block_offset_source,
+                               ram_addr_t offset_source,
+                               long size);
+
+/* 
+ * Inform the underlying transport of a new virtual memory area.
+ * If this area is an actual RAMBlock, then pass the corresponding
+ * parameters of that block.
+ * If this area is an arbitrary virtual memory address, then
+ * pass the same value for both @host_addr and @block_offset.
+ */
+typedef int (QEMURamAddFunc)(QEMUFile *f, void *opaque,
+                               void *host_addr,
+                               ram_addr_t block_offset,
+                               uint64_t length);
+
+/* 
+ * Remove an underlying new virtual memory area.
+ * If this area is an actual RAMBlock, then pass the corresponding
+ * parameters of that block.
+ * If this area is an arbitrary virtual memory address, then
+ * pass the same value for both @host_addr and @block_offset.
+ */
+typedef int (QEMURamRemoveFunc)(QEMUFile *f, void *opaque,
+                               ram_addr_t block_offset);
+
 typedef struct QEMUFileOps {
     QEMUFilePutBufferFunc *put_buffer;
     QEMUFileGetBufferFunc *get_buffer;
@@ -92,6 +138,10 @@ typedef struct QEMUFileOps {
     QEMURamHookFunc *after_ram_iterate;
     QEMURamHookFunc *hook_ram_load;
     QEMURamSaveFunc *save_page;
+    QEMURamLoadFunc *load_page;
+    QEMURamCopyFunc *copy_page;
+    QEMURamAddFunc *add;
+    QEMURamRemoveFunc *remove;
 } QEMUFileOps;
 
 QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
diff --git a/qemu-file.c b/qemu-file.c
index 9473b67..3d7428f 100644
--- a/qemu-file.c
+++ b/qemu-file.c
@@ -501,14 +501,17 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags)
     }
 }
 
-size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
-                         ram_addr_t offset, size_t size, int *bytes_sent)
+int ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
+                         uint8_t *host_addr,
+                         ram_addr_t offset, long size, int *bytes_sent)
 {
     if (f->ops->save_page) {
         int ret = f->ops->save_page(f, f->opaque, block_offset,
+                                    host_addr,
                                     offset, size, bytes_sent);
 
-        if (ret != RAM_SAVE_CONTROL_DELAYED) {
+        if (ret != RAM_SAVE_CONTROL_DELAYED
+                && ret != RAM_SAVE_CONTROL_NOT_SUPP) {
             if (bytes_sent && *bytes_sent > 0) {
                 qemu_update_position(f, *bytes_sent);
             } else if (ret < 0) {
@@ -522,6 +525,77 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
     return RAM_SAVE_CONTROL_NOT_SUPP;
 }
 
+int ram_control_load_page(QEMUFile *f, void *host_addr, long size)
+{
+    if (f->ops->load_page) {
+        int ret = f->ops->load_page(f, f->opaque, host_addr, size);
+
+        if (ret != RAM_LOAD_CONTROL_DELAYED 
+                && ret != RAM_LOAD_CONTROL_NOT_SUPP) {
+            if (ret < 0) {
+                qemu_file_set_error(f, ret);
+            }
+        }
+
+        return ret;
+    }
+
+    return RAM_LOAD_CONTROL_NOT_SUPP;
+}
+
+int ram_control_copy_page(QEMUFile *f, 
+                             ram_addr_t block_offset_dest,
+                             ram_addr_t offset_dest,
+                             ram_addr_t block_offset_source,
+                             ram_addr_t offset_source,
+                             long size)
+{
+    if (f->ops->copy_page) {
+        int ret = f->ops->copy_page(f, f->opaque,
+                                    block_offset_dest,
+                                    offset_dest,
+                                    block_offset_source,
+                                    offset_source,
+                                    size);
+
+        if (ret != RAM_COPY_CONTROL_DELAYED) {
+            if (ret < 0) {
+                qemu_file_set_error(f, ret);
+            }
+        }
+
+        return ret;
+    }
+
+    return RAM_COPY_CONTROL_NOT_SUPP;
+}
+
+
+void ram_control_add(QEMUFile *f, void *host_addr,
+                         ram_addr_t block_offset, uint64_t length)
+{
+    int ret = 0;
+
+    if (f->ops->add) {
+        ret = f->ops->add(f, f->opaque, host_addr, block_offset, length);
+        if (ret < 0) {
+            qemu_file_set_error(f, ret);
+        }
+    }
+}
+
+void ram_control_remove(QEMUFile *f, ram_addr_t block_offset)
+{
+    int ret = 0;
+
+    if (f->ops->remove) {
+        ret = f->ops->remove(f, f->opaque, block_offset);
+        if (ret < 0) {
+            qemu_file_set_error(f, ret);
+        }
+    }
+}
+
 static void qemu_fill_buffer(QEMUFile *f)
 {
     int len;
-- 
1.8.1.2

  parent reply	other threads:[~2014-02-18  8:51 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-02-18  8:50 [Qemu-devel] [RFC PATCH v2 00/12] mc: fault tolerante through micro-checkpointing mrhines
2014-02-18  8:50 ` [Qemu-devel] [RFC PATCH v2 01/12] mc: add documentation for micro-checkpointing mrhines
2014-02-18 12:45   ` Dr. David Alan Gilbert
2014-02-19  1:40     ` Michael R. Hines
2014-02-19 11:27       ` Dr. David Alan Gilbert
2014-02-20  1:17         ` Michael R. Hines
2014-02-20 10:09           ` Dr. David Alan Gilbert
2014-02-20 11:14             ` Li Guang
2014-02-20 14:58               ` Michael R. Hines
2014-02-20 14:57             ` Michael R. Hines
2014-02-20 16:32               ` Dr. David Alan Gilbert
2014-02-21  4:54                 ` Michael R. Hines
2014-02-21  9:44                   ` Dr. David Alan Gilbert
2014-03-03  6:08                     ` Michael R. Hines
2014-02-18  8:50 ` [Qemu-devel] [RFC PATCH v2 02/12] mc: timestamp migration_bitmap and KVM logdirty usage mrhines
2014-02-18 10:32   ` Dr. David Alan Gilbert
2014-02-19  1:42     ` Michael R. Hines
2014-03-11 21:31   ` Juan Quintela
2014-04-04  3:08     ` Michael R. Hines
2014-02-18  8:50 ` [Qemu-devel] [RFC PATCH v2 03/12] mc: introduce a 'checkpointing' status check into the VCPU states mrhines
2014-03-11 21:36   ` Juan Quintela
2014-04-04  3:11     ` Michael R. Hines
2014-03-11 21:40   ` Eric Blake
2014-04-04  3:12     ` Michael R. Hines
2014-02-18  8:50 ` mrhines [this message]
2014-02-18  8:50 ` [Qemu-devel] [RFC PATCH v2 05/12] rdma: accelerated memcpy() support and better external RDMA user interfaces mrhines
2014-02-18  8:50 ` [Qemu-devel] [RFC PATCH v2 06/12] mc: introduce state machine changes for MC mrhines
2014-02-19  1:00   ` Li Guang
2014-02-19  2:14     ` Michael R. Hines
2014-02-20  5:03     ` Michael R. Hines
2014-02-21  8:13     ` Michael R. Hines
2014-02-24  6:48       ` Li Guang
2014-02-26  2:52         ` Li Guang
2014-03-11 21:57   ` Juan Quintela
2014-04-04  3:50     ` Michael R. Hines
2014-02-18  8:50 ` [Qemu-devel] [RFC PATCH v2 07/12] mc: introduce additional QMP statistics for micro-checkpointing mrhines
2014-03-11 21:45   ` Eric Blake
2014-04-04  3:15     ` Michael R. Hines
2014-04-04  4:22       ` Eric Blake
2014-03-11 21:59   ` Juan Quintela
2014-04-04  3:55     ` Michael R. Hines
2014-02-18  8:50 ` [Qemu-devel] [RFC PATCH v2 08/12] mc: core logic mrhines
2014-02-19  1:07   ` Li Guang
2014-02-19  2:16     ` Michael R. Hines
2014-02-19  2:53       ` Li Guang
2014-02-19  4:27         ` Michael R. Hines
2014-02-18  8:50 ` [Qemu-devel] [RFC PATCH v2 09/12] mc: configure and makefile support mrhines
2014-02-18  8:50 ` [Qemu-devel] [RFC PATCH v2 10/12] mc: expose tunable parameter for checkpointing frequency mrhines
2014-03-11 21:49   ` Eric Blake
2014-03-11 22:15     ` Juan Quintela
2014-03-11 22:49       ` Eric Blake
2014-04-04  5:29         ` Michael R. Hines
2014-04-04 14:56           ` Eric Blake
2014-04-11  6:10             ` Michael R. Hines
2014-04-04 16:28           ` Dr. David Alan Gilbert
2014-04-04 16:35             ` Eric Blake
2014-04-04  3:29     ` Michael R. Hines
2014-02-18  8:50 ` [Qemu-devel] [RFC PATCH v2 11/12] mc: introduce new capabilities to control micro-checkpointing mrhines
2014-03-11 21:57   ` Eric Blake
2014-04-04  3:38     ` Michael R. Hines
2014-04-04  4:25       ` Eric Blake
2014-03-11 22:02   ` Juan Quintela
2014-03-11 22:07     ` Eric Blake
2014-04-04  3:57       ` Michael R. Hines
2014-04-04  3:56     ` Michael R. Hines
2014-02-18  8:50 ` [Qemu-devel] [RFC PATCH v2 12/12] mc: activate and use MC if requested mrhines
2014-02-18  9:28 ` [Qemu-devel] [RFC PATCH v2 00/12] mc: fault tolerante through micro-checkpointing Li Guang
2014-02-19  1:29   ` Michael R. Hines

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1392713429-18201-5-git-send-email-mrhines@linux.vnet.ibm.com \
    --to=mrhines@linux.vnet.ibm.com \
    --cc=BIRAN@il.ibm.com \
    --cc=EREZH@il.ibm.com \
    --cc=GILR@il.ibm.com \
    --cc=SADEKJ@il.ibm.com \
    --cc=abali@us.ibm.com \
    --cc=dbulkow@gmail.com \
    --cc=gokul@us.ibm.com \
    --cc=hinesmr@cn.ibm.com \
    --cc=isaku.yamahata@gmail.com \
    --cc=junqing.wang@cs2c.com.cn \
    --cc=lig.fnst@cn.fujitsu.com \
    --cc=mrhines@us.ibm.com \
    --cc=onom@us.ibm.com \
    --cc=owasserm@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).