qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com>
To: qemu-devel@nongnu.org
Cc: aarcange@redhat.com, yamahata@private.email.ne.jp,
	quintela@redhat.com, liang.z.li@intel.com, luis@cs.umu.se,
	bharata@linux.vnet.ibm.com, amit.shah@redhat.com,
	pbonzini@redhat.com, david@gibson.dropbear.id.au
Subject: [Qemu-devel] [PATCH v9 50/56] Postcopy; Handle userfault requests
Date: Thu,  5 Nov 2015 18:11:17 +0000	[thread overview]
Message-ID: <1446747083-18205-51-git-send-email-dgilbert@redhat.com> (raw)
In-Reply-To: <1446747083-18205-1-git-send-email-dgilbert@redhat.com>

From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>

userfaultfd is a Linux syscall that gives an fd that receives a stream
of notifications of accesses to pages registered with it and allows
the program to acknowledge those stalls and tell the accessing
thread to carry on.

We convert the requests from the kernel into messages back to the
source asking for the pages.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
---
 include/migration/migration.h |   3 +
 migration/postcopy-ram.c      | 155 +++++++++++++++++++++++++++++++++++++++---
 trace-events                  |   9 +++
 3 files changed, 158 insertions(+), 9 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index a48471e..329d535 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -89,11 +89,14 @@ struct MigrationIncomingState {
      */
     QemuEvent main_thread_load_event;
 
+    bool           have_fault_thread;
     QemuThread     fault_thread;
     QemuSemaphore  fault_thread_sem;
 
     /* For the kernel to send us notifications */
     int       userfault_fd;
+    /* To tell the fault_thread to quit */
+    int       userfault_quit_fd;
     QEMUFile *to_src_file;
     QemuMutex rp_mutex;    /* We send replies from multiple threads */
     void     *postcopy_tmp_page;
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 9627093..98db965 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -51,6 +51,8 @@ struct PostcopyDiscardState {
  */
 #if defined(__linux__)
 
+#include <poll.h>
+#include <sys/eventfd.h>
 #include <sys/mman.h>
 #include <sys/ioctl.h>
 #include <sys/syscall.h>
@@ -267,15 +269,41 @@ int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
  */
 int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
 {
-    /* TODO: Join the fault thread once we're sure it will exit */
-    if (qemu_ram_foreach_block(cleanup_range, mis)) {
-        return -1;
+    trace_postcopy_ram_incoming_cleanup_entry();
+
+    if (mis->have_fault_thread) {
+        uint64_t tmp64;
+
+        if (qemu_ram_foreach_block(cleanup_range, mis)) {
+            return -1;
+        }
+        /*
+         * Tell the fault_thread to exit, it's an eventfd that should
+         * currently be at 0, we're going to increment it to 1
+         */
+        tmp64 = 1;
+        if (write(mis->userfault_quit_fd, &tmp64, 8) == 8) {
+            trace_postcopy_ram_incoming_cleanup_join();
+            qemu_thread_join(&mis->fault_thread);
+        } else {
+            /* Not much we can do here, but may as well report it */
+            error_report("%s: incrementing userfault_quit_fd: %s", __func__,
+                         strerror(errno));
+        }
+        trace_postcopy_ram_incoming_cleanup_closeuf();
+        close(mis->userfault_fd);
+        close(mis->userfault_quit_fd);
+        mis->have_fault_thread = false;
     }
 
+    postcopy_state_set(POSTCOPY_INCOMING_END);
+    migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
+
     if (mis->postcopy_tmp_page) {
         munmap(mis->postcopy_tmp_page, getpagesize());
         mis->postcopy_tmp_page = NULL;
     }
+    trace_postcopy_ram_incoming_cleanup_exit();
     return 0;
 }
 
@@ -314,31 +342,140 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr,
 static void *postcopy_ram_fault_thread(void *opaque)
 {
     MigrationIncomingState *mis = opaque;
+    struct uffd_msg msg;
+    int ret;
+    size_t hostpagesize = getpagesize();
+    RAMBlock *rb = NULL;
+    RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
 
-    fprintf(stderr, "postcopy_ram_fault_thread\n");
-    /* TODO: In later patch */
+    trace_postcopy_ram_fault_thread_entry();
     qemu_sem_post(&mis->fault_thread_sem);
-    while (1) {
-        /* TODO: In later patch */
-    }
 
+    while (true) {
+        ram_addr_t rb_offset;
+        ram_addr_t in_raspace;
+        struct pollfd pfd[2];
+
+        /*
+         * We're mainly waiting for the kernel to give us a faulting HVA,
+         * however we can be told to quit via userfault_quit_fd which is
+         * an eventfd
+         */
+        pfd[0].fd = mis->userfault_fd;
+        pfd[0].events = POLLIN;
+        pfd[0].revents = 0;
+        pfd[1].fd = mis->userfault_quit_fd;
+        pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
+        pfd[1].revents = 0;
+
+        if (poll(pfd, 2, -1 /* Wait forever */) == -1) {
+            error_report("%s: userfault poll: %s", __func__, strerror(errno));
+            break;
+        }
+
+        if (pfd[1].revents) {
+            trace_postcopy_ram_fault_thread_quit();
+            break;
+        }
+
+        ret = read(mis->userfault_fd, &msg, sizeof(msg));
+        if (ret != sizeof(msg)) {
+            if (errno == EAGAIN) {
+                /*
+                 * if a wake up happens on the other thread just after
+                 * the poll, there is nothing to read.
+                 */
+                continue;
+            }
+            if (ret < 0) {
+                error_report("%s: Failed to read full userfault message: %s",
+                             __func__, strerror(errno));
+                break;
+            } else {
+                error_report("%s: Read %d bytes from userfaultfd expected %zd",
+                             __func__, ret, sizeof(msg));
+                break; /* Lost alignment, don't know what we'd read next */
+            }
+        }
+        if (msg.event != UFFD_EVENT_PAGEFAULT) {
+            error_report("%s: Read unexpected event %ud from userfaultfd",
+                         __func__, msg.event);
+            continue; /* It's not a page fault, shouldn't happen */
+        }
+
+        rb = qemu_ram_block_from_host(
+                 (void *)(uintptr_t)msg.arg.pagefault.address,
+                 true, &in_raspace, &rb_offset);
+        if (!rb) {
+            error_report("postcopy_ram_fault_thread: Fault outside guest: %"
+                         PRIx64, (uint64_t)msg.arg.pagefault.address);
+            break;
+        }
+
+        rb_offset &= ~(hostpagesize - 1);
+        trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
+                                                qemu_ram_get_idstr(rb),
+                                                rb_offset);
+
+        /*
+         * Send the request to the source - we want to request one
+         * of our host page sizes (which is >= TPS)
+         */
+        if (rb != last_rb) {
+            last_rb = rb;
+            migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
+                                     rb_offset, hostpagesize);
+        } else {
+            /* Save some space */
+            migrate_send_rp_req_pages(mis, NULL,
+                                     rb_offset, hostpagesize);
+        }
+    }
+    trace_postcopy_ram_fault_thread_exit();
     return NULL;
 }
 
 int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 {
-    /* Create the fault handler thread and wait for it to be ready */
+    /* Open the fd for the kernel to give us userfaults */
+    mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+    if (mis->userfault_fd == -1) {
+        error_report("%s: Failed to open userfault fd: %s", __func__,
+                     strerror(errno));
+        return -1;
+    }
+
+    /*
+     * Although the host check already tested the API, we need to
+     * do the check again as an ABI handshake on the new fd.
+     */
+    if (!ufd_version_check(mis->userfault_fd)) {
+        return -1;
+    }
+
+    /* Now an eventfd we use to tell the fault-thread to quit */
+    mis->userfault_quit_fd = eventfd(0, EFD_CLOEXEC);
+    if (mis->userfault_quit_fd == -1) {
+        error_report("%s: Opening userfault_quit_fd: %s", __func__,
+                     strerror(errno));
+        close(mis->userfault_fd);
+        return -1;
+    }
+
     qemu_sem_init(&mis->fault_thread_sem, 0);
     qemu_thread_create(&mis->fault_thread, "postcopy/fault",
                        postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE);
     qemu_sem_wait(&mis->fault_thread_sem);
     qemu_sem_destroy(&mis->fault_thread_sem);
+    mis->have_fault_thread = true;
 
     /* Mark so that we get notified of accesses to unwritten areas */
     if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) {
         return -1;
     }
 
+    trace_postcopy_ram_enable_notify();
+
     return 0;
 }
 
diff --git a/trace-events b/trace-events
index ad7afcb..60be299 100644
--- a/trace-events
+++ b/trace-events
@@ -1554,6 +1554,15 @@ postcopy_cleanup_range(const char *ramblock, void *host_addr, size_t offset, siz
 postcopy_init_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
 postcopy_place_page(void *host_addr) "host=%p"
 postcopy_place_page_zero(void *host_addr) "host=%p"
+postcopy_ram_enable_notify(void) ""
+postcopy_ram_fault_thread_entry(void) ""
+postcopy_ram_fault_thread_exit(void) ""
+postcopy_ram_fault_thread_quit(void) ""
+postcopy_ram_fault_thread_request(uint64_t hostaddr, const char *ramblock, size_t offset) "Request for HVA=%" PRIx64 " rb=%s offset=%zx"
+postcopy_ram_incoming_cleanup_closeuf(void) ""
+postcopy_ram_incoming_cleanup_entry(void) ""
+postcopy_ram_incoming_cleanup_exit(void) ""
+postcopy_ram_incoming_cleanup_join(void) ""
 
 # kvm-all.c
 kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
-- 
2.5.0

  parent reply	other threads:[~2015-11-05 18:13 UTC|newest]

Thread overview: 92+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-05 18:10 [Qemu-devel] [PATCH v9 00/56] Postcopy implementation Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 01/56] Add postcopy documentation Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 02/56] Provide runtime Target page information Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 03/56] Move configuration section writing Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 04/56] Move page_size_init earlier Dr. David Alan Gilbert (git)
2015-11-09  9:21   ` Juan Quintela
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 05/56] qemu_ram_block_from_host Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 06/56] qemu_ram_block_by_name Dr. David Alan Gilbert (git)
2015-11-09  9:24   ` Juan Quintela
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 07/56] Rename mis->file to from_src_file Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 08/56] Add qemu_get_buffer_in_place to avoid copies some of the time Dr. David Alan Gilbert (git)
2015-11-09  9:25   ` Juan Quintela
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 09/56] Add wrapper for setting blocking status on a QEMUFile Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 10/56] Add QEMU_MADV_NOHUGEPAGE Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 11/56] ram_debug_dump_bitmap: Dump a migration bitmap as text Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 12/56] ram_load: Factor out host_from_stream_offset call and check Dr. David Alan Gilbert (git)
2015-11-09  9:29   ` Juan Quintela
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 13/56] migrate_init: Call from savevm Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 14/56] Rename save_live_complete to save_live_complete_precopy Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 15/56] Add Linux userfaultfd.h header Dr. David Alan Gilbert (git)
2015-11-09  9:31   ` Juan Quintela
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 16/56] Return path: Open a return path on QEMUFile for sockets Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 17/56] Return path: socket_writev_buffer: Block even on non-blocking fd's Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 18/56] Migration commands Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 19/56] Return path: Control commands Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 20/56] Return path: Send responses from destination to source Dr. David Alan Gilbert (git)
2015-11-09 10:54   ` Juan Quintela
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 21/56] migration_is_setup_or_active Dr. David Alan Gilbert (git)
2015-11-09 10:55   ` Juan Quintela
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 22/56] Return path: Source handling of return path Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 23/56] Rework loadvm path for subloops Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 24/56] Add migration-capability boolean for postcopy-ram Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 25/56] Add wrappers and handlers for sending/receiving the postcopy-ram migration messages Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 26/56] MIG_CMD_PACKAGED: Send a packaged chunk of migration stream Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 27/56] Modify save_live_pending for postcopy Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 28/56] postcopy: OS support test Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 29/56] migrate_start_postcopy: Command to trigger transition to postcopy Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 30/56] migration_completion: Take current state Dr. David Alan Gilbert (git)
2015-11-09 10:57   ` Juan Quintela
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 31/56] MIGRATION_STATUS_POSTCOPY_ACTIVE: Add new migration state Dr. David Alan Gilbert (git)
2015-11-05 18:10 ` [Qemu-devel] [PATCH v9 32/56] Avoid sending vmdescription during postcopy Dr. David Alan Gilbert (git)
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 33/56] Add qemu_savevm_state_complete_postcopy Dr. David Alan Gilbert (git)
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 34/56] Postcopy: Maintain unsentmap Dr. David Alan Gilbert (git)
2015-11-09 11:01   ` Juan Quintela
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 35/56] Postcopy: Calculate discard Dr. David Alan Gilbert (git)
2015-11-09 11:08   ` Juan Quintela
2015-11-09 18:27     ` Dr. David Alan Gilbert
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 36/56] postcopy: Incoming initialisation Dr. David Alan Gilbert (git)
2015-11-09 11:11   ` Juan Quintela
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 37/56] postcopy: ram_enable_notify to switch on userfault Dr. David Alan Gilbert (git)
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 38/56] Postcopy: Postcopy startup in migration thread Dr. David Alan Gilbert (git)
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 39/56] Postcopy: End of iteration Dr. David Alan Gilbert (git)
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 40/56] Page request: Add MIG_RP_MSG_REQ_PAGES reverse command Dr. David Alan Gilbert (git)
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 41/56] Page request: Process incoming page request Dr. David Alan Gilbert (git)
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 42/56] Page request: Consume pages off the post-copy queue Dr. David Alan Gilbert (git)
2015-11-09 11:18   ` Juan Quintela
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 43/56] postcopy_ram.c: place_page and helpers Dr. David Alan Gilbert (git)
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 44/56] Postcopy: Use helpers to map pages during migration Dr. David Alan Gilbert (git)
2015-11-09 11:19   ` Juan Quintela
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 45/56] postcopy: Check order of received target pages Dr. David Alan Gilbert (git)
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 46/56] Don't sync dirty bitmaps in postcopy Dr. David Alan Gilbert (git)
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 47/56] Don't iterate on precopy-only devices during postcopy Dr. David Alan Gilbert (git)
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 48/56] Host page!=target page: Cleanup bitmaps Dr. David Alan Gilbert (git)
2015-11-09 11:22   ` Juan Quintela
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 49/56] Round up RAMBlock sizes to host page sizes Dr. David Alan Gilbert (git)
2015-11-05 18:11 ` Dr. David Alan Gilbert (git) [this message]
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 51/56] Start up a postcopy/listener thread ready for incoming page data Dr. David Alan Gilbert (git)
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 52/56] postcopy: Wire up loadvm_postcopy_handle_ commands Dr. David Alan Gilbert (git)
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 53/56] Postcopy: Mark nohugepage before discard Dr. David Alan Gilbert (git)
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 54/56] End of migration for postcopy Dr. David Alan Gilbert (git)
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 55/56] Disable mlock around incoming postcopy Dr. David Alan Gilbert (git)
2015-11-05 18:11 ` [Qemu-devel] [PATCH v9 56/56] Inhibit ballooning during postcopy Dr. David Alan Gilbert (git)
2015-11-06  3:48 ` [Qemu-devel] [PATCH v9 00/56] Postcopy implementation Bharata B Rao
2015-11-06  9:09   ` Dr. David Alan Gilbert
2015-11-06 11:06     ` Bharata B Rao
2015-11-06 12:22       ` Dr. David Alan Gilbert
2015-11-06 13:43         ` Dr. David Alan Gilbert
2015-11-06 15:33           ` Bharata B Rao
2015-11-06 15:48             ` Dr. David Alan Gilbert
2015-11-09  3:21               ` Bharata B Rao
2015-11-09  9:08                 ` Dr. David Alan Gilbert
2015-11-09 10:28                   ` Bharata B Rao
2015-11-09 11:03                     ` Dr. David Alan Gilbert
2015-11-09 12:39                       ` Bharata B Rao
2015-11-09 12:52                         ` Dr. David Alan Gilbert
2015-11-09  4:13         ` David Gibson
2015-11-09  9:05           ` Paolo Bonzini
2015-11-09 16:02 ` Juan Quintela
2015-11-09 16:17   ` Dr. David Alan Gilbert
2016-01-27 11:54 ` Vladimir Sementsov-Ogievskiy
2016-01-27 12:42   ` Dr. David Alan Gilbert
2016-01-27 14:47     ` Vladimir Sementsov-Ogievskiy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1446747083-18205-51-git-send-email-dgilbert@redhat.com \
    --to=dgilbert@redhat.com \
    --cc=aarcange@redhat.com \
    --cc=amit.shah@redhat.com \
    --cc=bharata@linux.vnet.ibm.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=liang.z.li@intel.com \
    --cc=luis@cs.umu.se \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=yamahata@private.email.ne.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).