From: Anthony Liguori <aliguori@us.ibm.com>
To: qemu-devel@nongnu.org
Cc: Chris Wright <chrisw@redhat.com>,
Uri Lublin <uri.lublin@qumranet.com>,
Anthony Liguori <aliguori@us.ibm.com>,
kvm@vger.kernel.org
Subject: [Qemu-devel] [PATCH 7/10] Switch the memory savevm handler to be "live"
Date: Tue, 9 Sep 2008 14:49:59 -0500 [thread overview]
Message-ID: <1220989802-13706-8-git-send-email-aliguori@us.ibm.com> (raw)
In-Reply-To: <1220989802-13706-1-git-send-email-aliguori@us.ibm.com>
This patch replaces the static memory savevm/loadvm handler with a "live" one.
This handler is used even if performing a non-live migration.
The key difference between this handler and the previous is that each page is
prefixed with the address of the page. The QEMUFile rate limiting code, in
combination with the live migration dirty tracking bits, is used to determine
which pages should be sent and how many should be sent.
The live save code "converges" when the number of dirty pages reaches a fixed
amount. Currently, this is 10 pages. This is something that should eventually
be derived from whatever the bandwidth limitation is.
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
diff --git a/vl.c b/vl.c
index db87e6f..a55ccb4 100644
--- a/vl.c
+++ b/vl.c
@@ -7293,61 +7293,143 @@ static void ram_decompress_close(RamDecompressState *s)
inflateEnd(&s->zstream);
}
-static void ram_save(QEMUFile *f, void *opaque)
+#define RAM_SAVE_FLAG_FULL 0x01
+#define RAM_SAVE_FLAG_COMPRESS 0x02
+#define RAM_SAVE_FLAG_MEM_SIZE 0x04
+#define RAM_SAVE_FLAG_PAGE 0x08
+#define RAM_SAVE_FLAG_EOS 0x10
+
+static void ram_save_dead(QEMUFile *f, void *opaque)
{
- ram_addr_t i;
RamCompressState s1, *s = &s1;
uint8_t buf[10];
+ ram_addr_t i;
+
+ qemu_put_be64(f, phys_ram_size | RAM_SAVE_FLAG_FULL | RAM_SAVE_FLAG_MEM_SIZE);
- qemu_put_be32(f, phys_ram_size);
if (ram_compress_open(s, f) < 0)
return;
for(i = 0; i < phys_ram_size; i+= BDRV_HASH_BLOCK_SIZE) {
-#if 0
- if (tight_savevm_enabled) {
- int64_t sector_num;
- int j;
-
- /* find if the memory block is available on a virtual
- block device */
- sector_num = -1;
- for(j = 0; j < nb_drives; j++) {
- sector_num = bdrv_hash_find(drives_table[j].bdrv,
- phys_ram_base + i,
- BDRV_HASH_BLOCK_SIZE);
- if (sector_num >= 0)
- break;
+ // normal_compress:
+ buf[0] = 0;
+ ram_compress_buf(s, buf, 1);
+ ram_compress_buf(s, phys_ram_base + i, BDRV_HASH_BLOCK_SIZE);
+ }
+ ram_compress_close(s);
+}
+
+static int is_dup_page(uint8_t *page, uint8_t ch)
+{
+ uint32_t val = ch << 24 | ch << 16 | ch << 8 | ch;
+ uint32_t *array = (uint32_t *)page;
+ int i;
+
+ for (i = 0; i < (TARGET_PAGE_SIZE / 4); i++) {
+ if (array[i] != val)
+ return 0;
+ }
+
+ return 1;
+}
+
+static int ram_save_block(QEMUFile *f)
+{
+ static ram_addr_t current_addr = 0;
+ ram_addr_t saved_addr = current_addr;
+ ram_addr_t addr = 0;
+ int found = 0;
+
+ while (addr < phys_ram_size) {
+ if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) {
+ uint8_t ch;
+
+ cpu_physical_memory_reset_dirty(current_addr,
+ current_addr + TARGET_PAGE_SIZE,
+ MIGRATION_DIRTY_FLAG);
+
+ ch = *(phys_ram_base + current_addr);
+
+ if (is_dup_page(phys_ram_base + current_addr, ch)) {
+ qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS);
+ qemu_put_byte(f, ch);
+ } else {
+ qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_PAGE);
+ qemu_put_buffer(f, phys_ram_base + current_addr, TARGET_PAGE_SIZE);
}
- if (j == nb_drives)
- goto normal_compress;
- buf[0] = 1;
- buf[1] = j;
- cpu_to_be64wu((uint64_t *)(buf + 2), sector_num);
- ram_compress_buf(s, buf, 10);
- } else
-#endif
- {
- // normal_compress:
- buf[0] = 0;
- ram_compress_buf(s, buf, 1);
- ram_compress_buf(s, phys_ram_base + i, BDRV_HASH_BLOCK_SIZE);
+
+ found = 1;
+ break;
}
+ addr += TARGET_PAGE_SIZE;
+ current_addr = (saved_addr + addr) % phys_ram_size;
}
- ram_compress_close(s);
+
+ return found;
}
-static int ram_load(QEMUFile *f, void *opaque, int version_id)
+static ram_addr_t ram_save_threshold = 10;
+
+static ram_addr_t ram_save_remaining(void)
+{
+ ram_addr_t addr;
+ ram_addr_t count = 0;
+
+ for (addr = 0; addr < phys_ram_size; addr += TARGET_PAGE_SIZE) {
+ if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG))
+ count++;
+ }
+
+ return count;
+}
+
+static int ram_save_live(QEMUFile *f, int stage, void *opaque)
+{
+ ram_addr_t addr;
+
+ /* FIXME handling !vm_running && stage == 3 specially */
+
+ if (stage == 1) {
+ /* Make sure all dirty bits are set */
+ for (addr = 0; addr < phys_ram_size; addr += TARGET_PAGE_SIZE) {
+ if (!cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG))
+ cpu_physical_memory_set_dirty(addr);
+ }
+
+ /* Enable dirty memory tracking */
+ cpu_physical_memory_set_dirty_tracking(1);
+
+ qemu_put_be64(f, phys_ram_size | RAM_SAVE_FLAG_MEM_SIZE);
+ }
+
+ while (!qemu_file_rate_limit(f)) {
+ int ret;
+
+ ret = ram_save_block(f);
+ if (ret == 0) /* no more blocks */
+ break;
+ }
+
+ /* try transferring iterative blocks of memory */
+
+ if (stage == 3) {
+ cpu_physical_memory_set_dirty_tracking(0);
+
+ /* flush all remaining blocks regardless of rate limiting */
+ while (ram_save_block(f) != 0);
+ }
+
+out:
+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+
+ return (stage == 2) && (ram_save_remaining() < ram_save_threshold);
+}
+
+static int ram_load_dead(QEMUFile *f, void *opaque)
{
RamDecompressState s1, *s = &s1;
uint8_t buf[10];
ram_addr_t i;
- if (version_id == 1)
- return ram_load_v1(f, opaque);
- if (version_id != 2)
- return -EINVAL;
- if (qemu_get_be32(f) != phys_ram_size)
- return -EINVAL;
if (ram_decompress_open(s, f) < 0)
return -EINVAL;
for(i = 0; i < phys_ram_size; i+= BDRV_HASH_BLOCK_SIZE) {
@@ -7360,35 +7442,57 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
fprintf(stderr, "Error while reading ram block address=0x%08" PRIx64, (uint64_t)i);
goto error;
}
- } else
-#if 0
- if (buf[0] == 1) {
- int bs_index;
- int64_t sector_num;
-
- ram_decompress_buf(s, buf + 1, 9);
- bs_index = buf[1];
- sector_num = be64_to_cpupu((const uint64_t *)(buf + 2));
- if (bs_index >= nb_drives) {
- fprintf(stderr, "Invalid block device index %d\n", bs_index);
- goto error;
- }
- if (bdrv_read(drives_table[bs_index].bdrv, sector_num,
- phys_ram_base + i,
- BDRV_HASH_BLOCK_SIZE / 512) < 0) {
- fprintf(stderr, "Error while reading sector %d:%" PRId64 "\n",
- bs_index, sector_num);
- goto error;
- }
- } else
-#endif
- {
+ } else {
error:
printf("Error block header\n");
return -EINVAL;
}
}
ram_decompress_close(s);
+
+ return 0;
+}
+
+static int ram_load(QEMUFile *f, void *opaque, int version_id)
+{
+ ram_addr_t addr;
+ int flags;
+
+ if (version_id == 1)
+ return ram_load_v1(f, opaque);
+
+ if (version_id == 2) {
+ if (qemu_get_be32(f) != phys_ram_size)
+ return -EINVAL;
+ return ram_load_dead(f, opaque);
+ }
+
+ if (version_id != 3)
+ return -EINVAL;
+
+ do {
+ addr = qemu_get_be64(f);
+
+ flags = addr & ~TARGET_PAGE_MASK;
+ addr &= TARGET_PAGE_MASK;
+
+ if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
+ if (addr != phys_ram_size)
+ return -EINVAL;
+ }
+
+ if (flags & RAM_SAVE_FLAG_FULL) {
+ if (ram_load_dead(f, opaque) < 0)
+ return -EINVAL;
+ }
+
+ if (flags & RAM_SAVE_FLAG_COMPRESS) {
+ uint8_t ch = qemu_get_byte(f);
+ memset(phys_ram_base + addr, ch, TARGET_PAGE_SIZE);
+ } else if (flags & RAM_SAVE_FLAG_PAGE)
+ qemu_get_buffer(f, phys_ram_base + addr, TARGET_PAGE_SIZE);
+ } while (!(flags & RAM_SAVE_FLAG_EOS));
+
return 0;
}
@@ -9354,7 +9458,7 @@ int main(int argc, char **argv)
exit(1);
register_savevm("timer", 0, 2, timer_save, timer_load, NULL);
- register_savevm("ram", 0, 2, ram_save, ram_load, NULL);
+ register_savevm_live("ram", 0, 3, ram_save_live, NULL, ram_load, NULL);
/* terminal init */
memset(&display_state, 0, sizeof(display_state));
next prev parent reply other threads:[~2008-09-09 19:51 UTC|newest]
Thread overview: 57+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-09-09 19:49 [Qemu-devel] [PATCH 0/10] Live migration for QEMU Anthony Liguori
2008-09-09 19:49 ` [Qemu-devel] [PATCH 1/10] Refactor QEMUFile for live migration Anthony Liguori
2008-09-10 13:25 ` Chris Lalancette
2008-09-10 14:38 ` [Qemu-devel] " Glauber Costa
2008-09-10 15:05 ` Avi Kivity
2008-09-10 15:16 ` Anthony Liguori
2008-09-12 15:40 ` [Qemu-devel] " Blue Swirl
2008-09-09 19:49 ` [Qemu-devel] [PATCH 2/10] Allow the monitor to be suspended during non-blocking op Anthony Liguori
2008-09-10 6:52 ` Avi Kivity
2008-09-10 10:05 ` Daniel P. Berrange
2008-09-10 11:11 ` Avi Kivity
2008-09-10 11:14 ` Daniel P. Berrange
2008-09-10 15:36 ` Avi Kivity
2008-09-10 15:40 ` Anthony Liguori
2008-09-10 15:58 ` Jamie Lokier
2008-09-11 10:16 ` Avi Kivity
2008-09-11 11:59 ` Jamie Lokier
2008-09-10 13:07 ` Anthony Liguori
2008-09-10 13:26 ` Chris Lalancette
2008-09-10 10:01 ` Daniel P. Berrange
2008-09-10 13:11 ` Anthony Liguori
2008-09-09 19:49 ` [Qemu-devel] [PATCH 3/10] Add bdrv_flush_all() Anthony Liguori
2008-09-10 13:26 ` Chris Lalancette
2008-09-10 14:46 ` Glauber Costa
2008-09-10 15:19 ` Anthony Liguori
2008-09-10 15:32 ` Glauber Costa
2008-09-10 15:39 ` Avi Kivity
2008-09-10 16:37 ` Paul Brook
2008-09-12 15:43 ` Blue Swirl
2008-09-09 19:49 ` [Qemu-devel] [PATCH 4/10] Add dirty tracking for live migration Anthony Liguori
2008-09-10 14:52 ` Glauber Costa
2008-09-10 14:56 ` Anthony Liguori
2008-09-10 15:01 ` Glauber Costa
2008-09-09 19:49 ` [Qemu-devel] [PATCH 5/10] Add network announce function Anthony Liguori
2008-09-10 13:27 ` Chris Lalancette
2008-09-10 13:54 ` Anthony Liguori
2008-09-10 14:00 ` Avi Kivity
2008-09-09 19:49 ` [Qemu-devel] [PATCH 6/10] Introduce v3 of savevm protocol Anthony Liguori
2008-09-10 7:09 ` Avi Kivity
2008-09-09 19:49 ` Anthony Liguori [this message]
2008-09-09 22:25 ` [Qemu-devel] [PATCH 7/10] Switch the memory savevm handler to be "live" Jamie Lokier
2008-09-09 22:49 ` Anthony Liguori
2008-09-10 7:17 ` Avi Kivity
2008-09-10 13:10 ` Anthony Liguori
2008-09-09 19:50 ` [Qemu-devel] [PATCH 8/10] Introduce a buffered QEMUFile wrapper Anthony Liguori
2008-09-12 15:16 ` Blue Swirl
2008-09-09 19:50 ` [Qemu-devel] [PATCH 9/10] Introduce the UI components for live migration Anthony Liguori
2008-09-09 19:50 ` [Qemu-devel] [PATCH 10/10] TCP based " Anthony Liguori
2008-09-10 16:46 ` Blue Swirl
2008-09-10 16:51 ` Anthony Liguori
2008-09-11 12:13 ` [Qemu-devel] [PATCH 0/10] Live migration for QEMU Atsushi SAKAI
2008-09-11 13:06 ` Anthony Liguori
2008-09-11 13:30 ` Jamie Lokier
2008-09-11 14:12 ` Anthony Liguori
2008-09-11 15:32 ` Avi Kivity
2008-09-11 16:22 ` Anthony Liguori
2008-09-11 16:32 ` Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1220989802-13706-8-git-send-email-aliguori@us.ibm.com \
--to=aliguori@us.ibm.com \
--cc=chrisw@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=qemu-devel@nongnu.org \
--cc=uri.lublin@qumranet.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).