[Qemu-devel] [PULL 04/14] add 'release-ram' migrate capability

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com>
To: qemu-devel@nongnu.org
Cc: quintela@redhat.com, ashijeetacharya@gmail.com, amit@kernel.org,
	pbutsykin@virtuozzo.com, zhang.zhanghailiang@huawei.com
Subject: [Qemu-devel] [PULL 04/14] add 'release-ram' migrate capability
Date: Mon, 13 Feb 2017 17:50:23 +0000	[thread overview]
Message-ID: <20170213175033.7314-5-dgilbert@redhat.com> (raw)
In-Reply-To: <20170213175033.7314-1-dgilbert@redhat.com>

From: Pavel Butsykin <pbutsykin@virtuozzo.com>

This feature frees the migrated memory on the source during postcopy-ram
migration. In the second step of postcopy-ram migration when the source vm
is put on pause we can free unnecessary memory. It will allow, in particular,
to start relaxing the memory stress on the source host in a load-balancing
scenario.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Message-Id: <20170203152321.19739-3-pbutsykin@virtuozzo.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
   Manually merged in Pavel's 'migration: madvise error_report fixup!'
---
 include/migration/migration.h |  1 +
 include/migration/qemu-file.h |  3 ++-
 migration/migration.c         |  9 +++++++
 migration/qemu-file.c         | 59 ++++++++++++++++++++++++++++++++++++++-----
 migration/ram.c               | 22 +++++++++++++++-
 qapi-schema.json              |  5 +++-
 6 files changed, 89 insertions(+), 10 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 7528cc2..b9b706a 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -304,6 +304,7 @@ int migrate_add_blocker(Error *reason, Error **errp);
  */
 void migrate_del_blocker(Error *reason);
 
+bool migrate_release_ram(void);
 bool migrate_postcopy_ram(void);
 bool migrate_zero_blocks(void);
 
diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index abedd46..0cd648a 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -132,7 +132,8 @@ void qemu_put_byte(QEMUFile *f, int v);
  * put_buffer without copying the buffer.
  * The buffer should be available till it is sent asynchronously.
  */
-void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size);
+void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size,
+                           bool may_free);
 bool qemu_file_mode_is_not_valid(const char *mode);
 bool qemu_file_is_writable(QEMUFile *f);
 
diff --git a/migration/migration.c b/migration/migration.c
index 2b179c6..68afc07 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1297,6 +1297,15 @@ void qmp_migrate_set_downtime(double value, Error **errp)
     qmp_migrate_set_parameters(&p, errp);
 }
 
+bool migrate_release_ram(void)
+{
+    MigrationState *s;
+
+    s = migrate_get_current();
+
+    return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
+}
+
 bool migrate_postcopy_ram(void)
 {
     MigrationState *s;
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index e9fae31..195fa94 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -49,6 +49,7 @@ struct QEMUFile {
     int buf_size; /* 0 when writing */
     uint8_t buf[IO_BUF_SIZE];
 
+    DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
     struct iovec iov[MAX_IOV_SIZE];
     unsigned int iovcnt;
 
@@ -132,6 +133,41 @@ bool qemu_file_is_writable(QEMUFile *f)
     return f->ops->writev_buffer;
 }
 
+static void qemu_iovec_release_ram(QEMUFile *f)
+{
+    struct iovec iov;
+    unsigned long idx;
+
+    /* Find and release all the contiguous memory ranges marked as may_free. */
+    idx = find_next_bit(f->may_free, f->iovcnt, 0);
+    if (idx >= f->iovcnt) {
+        return;
+    }
+    iov = f->iov[idx];
+
+    /* The madvise() in the loop is called for iov within a continuous range and
+     * then reinitialize the iov. And in the end, madvise() is called for the
+     * last iov.
+     */
+    while ((idx = find_next_bit(f->may_free, f->iovcnt, idx + 1)) < f->iovcnt) {
+        /* check for adjacent buffer and coalesce them */
+        if (iov.iov_base + iov.iov_len == f->iov[idx].iov_base) {
+            iov.iov_len += f->iov[idx].iov_len;
+            continue;
+        }
+        if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) {
+            error_report("migrate: madvise DONTNEED failed %p %zd: %s",
+                         iov.iov_base, iov.iov_len, strerror(errno));
+        }
+        iov = f->iov[idx];
+    }
+    if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) {
+            error_report("migrate: madvise DONTNEED failed %p %zd: %s",
+                         iov.iov_base, iov.iov_len, strerror(errno));
+    }
+    memset(f->may_free, 0, sizeof(f->may_free));
+}
+
 /**
  * Flushes QEMUFile buffer
  *
@@ -151,6 +187,8 @@ void qemu_fflush(QEMUFile *f)
     if (f->iovcnt > 0) {
         expect = iov_size(f->iov, f->iovcnt);
         ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos);
+
+        qemu_iovec_release_ram(f);
     }
 
     if (ret >= 0) {
@@ -304,13 +342,19 @@ int qemu_fclose(QEMUFile *f)
     return ret;
 }
 
-static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size)
+static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size,
+                         bool may_free)
 {
     /* check for adjacent buffer and coalesce them */
     if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base +
-        f->iov[f->iovcnt - 1].iov_len) {
+        f->iov[f->iovcnt - 1].iov_len &&
+        may_free == test_bit(f->iovcnt - 1, f->may_free))
+    {
         f->iov[f->iovcnt - 1].iov_len += size;
     } else {
+        if (may_free) {
+            set_bit(f->iovcnt, f->may_free);
+        }
         f->iov[f->iovcnt].iov_base = (uint8_t *)buf;
         f->iov[f->iovcnt++].iov_len = size;
     }
@@ -320,14 +364,15 @@ static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size)
     }
 }
 
-void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size)
+void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size,
+                           bool may_free)
 {
     if (f->last_error) {
         return;
     }
 
     f->bytes_xfer += size;
-    add_to_iovec(f, buf, size);
+    add_to_iovec(f, buf, size, may_free);
 }
 
 void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
@@ -345,7 +390,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
         }
         memcpy(f->buf + f->buf_index, buf, l);
         f->bytes_xfer += l;
-        add_to_iovec(f, f->buf + f->buf_index, l);
+        add_to_iovec(f, f->buf + f->buf_index, l, false);
         f->buf_index += l;
         if (f->buf_index == IO_BUF_SIZE) {
             qemu_fflush(f);
@@ -366,7 +411,7 @@ void qemu_put_byte(QEMUFile *f, int v)
 
     f->buf[f->buf_index] = v;
     f->bytes_xfer++;
-    add_to_iovec(f, f->buf + f->buf_index, 1);
+    add_to_iovec(f, f->buf + f->buf_index, 1, false);
     f->buf_index++;
     if (f->buf_index == IO_BUF_SIZE) {
         qemu_fflush(f);
@@ -647,7 +692,7 @@ ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size,
     }
     qemu_put_be32(f, blen);
     if (f->ops->writev_buffer) {
-        add_to_iovec(f, f->buf + f->buf_index, blen);
+        add_to_iovec(f, f->buf + f->buf_index, blen, false);
     }
     f->buf_index += blen;
     if (f->buf_index == IO_BUF_SIZE) {
diff --git a/migration/ram.c b/migration/ram.c
index 91443b3..c22209d 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -705,6 +705,16 @@ static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
     return pages;
 }
 
+static void ram_release_pages(MigrationState *ms, const char *block_name,
+                              uint64_t offset, int pages)
+{
+    if (!migrate_release_ram() || !migration_in_postcopy(ms)) {
+        return;
+    }
+
+    ram_discard_range(NULL, block_name, offset, pages << TARGET_PAGE_BITS);
+}
+
 /**
  * ram_save_page: Send the given page to the stream
  *
@@ -765,6 +775,7 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss,
              * page would be stale
              */
             xbzrle_cache_zero_page(current_addr);
+            ram_release_pages(ms, block->idstr, pss->offset, pages);
         } else if (!ram_bulk_stage &&
                    !migration_in_postcopy(ms) && migrate_use_xbzrle()) {
             pages = save_xbzrle_page(f, &p, current_addr, block,
@@ -783,7 +794,9 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss,
         *bytes_transferred += save_page_header(f, block,
                                                offset | RAM_SAVE_FLAG_PAGE);
         if (send_async) {
-            qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+            qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE,
+                                  migrate_release_ram() &
+                                  migration_in_postcopy(ms));
         } else {
             qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
         }
@@ -813,6 +826,8 @@ static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
         error_report("compressed data failed!");
     } else {
         bytes_sent += blen;
+        ram_release_pages(migrate_get_current(), block->idstr,
+                          offset & TARGET_PAGE_MASK, 1);
     }
 
     return bytes_sent;
@@ -952,12 +967,17 @@ static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f,
                     error_report("compressed data failed!");
                 }
             }
+            if (pages > 0) {
+                ram_release_pages(ms, block->idstr, pss->offset, pages);
+            }
         } else {
             offset |= RAM_SAVE_FLAG_CONTINUE;
             pages = save_zero_page(f, block, offset, p, bytes_transferred);
             if (pages == -1) {
                 pages = compress_page_with_multi_thread(f, block, offset,
                                                         bytes_transferred);
+            } else {
+                ram_release_pages(ms, block->idstr, pss->offset, pages);
             }
         }
     }
diff --git a/qapi-schema.json b/qapi-schema.json
index 61151f3..9330541 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -865,11 +865,14 @@
 #        side, this process is called COarse-Grain LOck Stepping (COLO) for
 #        Non-stop Service. (since 2.8)
 #
+# @release-ram: if enabled, qemu will free the migrated ram pages on the source
+#        during postcopy-ram migration. (since 2.9)
+#
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
   'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
-           'compress', 'events', 'postcopy-ram', 'x-colo'] }
+           'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram'] }
 
 ##
 # @MigrationCapabilityStatus:
-- 
2.9.3

next prev parent reply	other threads:[~2017-02-13 17:50 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-13 17:50 [Qemu-devel] [PULL 00/14] migration queue Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 01/14] migration: remove myself as maintainer Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 02/14] MAINTAINERS: update my email address Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 03/14] migration: add MigrationState arg for ram_save_/compressed_/page() Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` Dr. David Alan Gilbert (git) [this message]
2017-02-13 17:50 ` [Qemu-devel] [PULL 05/14] migration: discard non-dirty ram pages after the start of postcopy Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 06/14] migrate: Introduce zero RAM checks to skip RAM migration Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 07/14] migration: consolidate VMStateField.start Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 08/14] COLO: fix setting checkpoint-delay not working properly Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 09/14] COLO: Shutdown related socket fd while do failover Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 10/14] COLO: Don't process failover request while loading VM's state Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 11/14] migration: Add VMSTATE_UNUSED_VARRAY_UINT32 Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 12/14] migration: Add VMSTATE_WITH_TMP Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 13/14] tests/migration: Add test for VMSTATE_WITH_TMP Dr. David Alan Gilbert (git)
2017-02-13 17:50 ` [Qemu-devel] [PULL 14/14] virtio/migration: Migrate virtio-net to VMState Dr. David Alan Gilbert (git)
2017-02-14  9:52 ` [Qemu-devel] [PULL 00/14] migration queue Peter Maydell

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:7528cc2 dfblob:b9b706a dfblob:abedd46 dfblob:0cd648a
dfblob:2b179c6 dfblob:68afc07 dfblob:e9fae31 dfblob:195fa94
dfblob:91443b3 dfblob:c22209d dfblob:61151f3 dfblob:9330541 )
 OR (
bs:"[Qemu-devel] [PULL 04/14] add 'release-ram' migrate capability" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170213175033.7314-5-dgilbert@redhat.com \
    --to=dgilbert@redhat.com \
    --cc=amit@kernel.org \
    --cc=ashijeetacharya@gmail.com \
    --cc=pbutsykin@virtuozzo.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=zhang.zhanghailiang@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).