qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Avihai Horon <avihaih@nvidia.com>
To: <qemu-devel@nongnu.org>
Cc: "Alex Williamson" <alex.williamson@redhat.com>,
	"Cédric Le Goater" <clg@redhat.com>,
	"Juan Quintela" <quintela@redhat.com>,
	"Peter Xu" <peterx@redhat.com>,
	"Leonardo Bras" <leobras@redhat.com>,
	"Eric Blake" <eblake@redhat.com>,
	"Markus Armbruster" <armbru@redhat.com>,
	"Thomas Huth" <thuth@redhat.com>,
	"Laurent Vivier" <lvivier@redhat.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Yishai Hadas" <yishaih@nvidia.com>,
	"Jason Gunthorpe" <jgg@nvidia.com>,
	"Maor Gottlieb" <maorg@nvidia.com>,
	"Avihai Horon" <avihaih@nvidia.com>,
	"Kirti Wankhede" <kwankhede@nvidia.com>,
	"Tarun Gupta" <targupta@nvidia.com>,
	"Joao Martins" <joao.m.martins@oracle.com>
Subject: [PATCH 8/8] vfio/migration: Add support for precopy initial data capability
Date: Mon, 1 May 2023 17:01:41 +0300	[thread overview]
Message-ID: <20230501140141.11743-9-avihaih@nvidia.com> (raw)
In-Reply-To: <20230501140141.11743-1-avihaih@nvidia.com>

Loading of a VFIO device's data can take a substantial amount of time as
the device may need to allocate resources, prepare internal data
structures, etc. This can increase migration downtime, especially for
VFIO devices with a lot of resources.

To solve this, VFIO migration uAPI defines "initial bytes" as part of
its precopy data stream. Initial bytes can be used in various ways to
improve VFIO migration performance. For example, it can be used to
transfer device metadata to pre-allocate resources in the destination.
However, for this to work we need to make sure that all initial bytes
are sent and loaded in the destination before the source VM is stopped.

Use migration precopy initial data capability to make sure a VFIO
device's initial bytes are sent and loaded in the destination before the
source stops the VM and attempts to complete the migration.
This can significantly reduce migration downtime.

Signed-off-by: Avihai Horon <avihaih@nvidia.com>
---
 include/hw/vfio/vfio-common.h |  3 +++
 hw/vfio/migration.c           | 48 ++++++++++++++++++++++++++++++++++-
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index fa42955d4c..dd3b052682 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -69,6 +69,9 @@ typedef struct VFIOMigration {
     uint64_t precopy_init_size;
     uint64_t precopy_dirty_size;
     uint64_t mig_flags;
+    bool initial_data_active;
+    bool initial_data_sent;
+    bool initial_data_loaded;
 } VFIOMigration;
 
 typedef struct VFIOAddressSpace {
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 980be1f614..23f4f1f8a5 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -45,6 +45,7 @@
 #define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
 #define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
 #define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
+#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL)
 
 /*
  * This is an arbitrary size based on migration of mlx5 devices, where typically
@@ -372,6 +373,8 @@ static void vfio_save_cleanup(void *opaque)
 
     g_free(migration->data_buffer);
     migration->data_buffer = NULL;
+    migration->initial_data_sent = false;
+    migration->initial_data_active = false;
     vfio_migration_cleanup(vbasedev);
     trace_vfio_save_cleanup(vbasedev->name);
 }
@@ -447,10 +450,17 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
     if (data_size < 0) {
         return data_size;
     }
-    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
 
     vfio_update_estimated_pending_data(migration, data_size);
 
+    if (migration->initial_data_active && !migration->precopy_init_size &&
+        !migration->initial_data_sent) {
+        qemu_put_be64(f, VFIO_MIG_FLAG_DEV_INIT_DATA_SENT);
+        migration->initial_data_sent = true;
+    } else {
+        qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+    }
+
     trace_vfio_save_iterate(vbasedev->name);
 
     /*
@@ -568,6 +578,12 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
             }
             break;
         }
+        case VFIO_MIG_FLAG_DEV_INIT_DATA_SENT:
+        {
+            vbasedev->migration->initial_data_loaded = true;
+
+            return 0;
+        }
         default:
             error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
             return -EINVAL;
@@ -582,6 +598,33 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
     return ret;
 }
 
+static bool vfio_initial_data_advise(void *opaque)
+{
+    VFIODevice *vbasedev = opaque;
+    VFIOMigration *migration = vbasedev->migration;
+
+    migration->initial_data_active =
+        migration->mig_flags & VFIO_MIGRATION_PRE_COPY;
+
+    return migration->initial_data_active;
+}
+
+static bool vfio_is_initial_data_active(void *opaque)
+{
+    VFIODevice *vbasedev = opaque;
+    VFIOMigration *migration = vbasedev->migration;
+
+    return migration->initial_data_active;
+}
+
+static bool vfio_initial_data_loaded(void *opaque)
+{
+    VFIODevice *vbasedev = opaque;
+    VFIOMigration *migration = vbasedev->migration;
+
+    return migration->initial_data_loaded;
+}
+
 static const SaveVMHandlers savevm_vfio_handlers = {
     .save_setup = vfio_save_setup,
     .save_cleanup = vfio_save_cleanup,
@@ -594,6 +637,9 @@ static const SaveVMHandlers savevm_vfio_handlers = {
     .load_setup = vfio_load_setup,
     .load_cleanup = vfio_load_cleanup,
     .load_state = vfio_load_state,
+    .initial_data_advise = vfio_initial_data_advise,
+    .is_initial_data_active = vfio_is_initial_data_active,
+    .initial_data_loaded = vfio_initial_data_loaded,
 };
 
 /* ---------------------------------------------------------------------- */
-- 
2.26.3



  parent reply	other threads:[~2023-05-01 14:03 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-01 14:01 [PATCH 0/8] migration: Add precopy initial data capability and VFIO precopy support Avihai Horon
2023-05-01 14:01 ` [PATCH 1/8] migration: Add precopy initial data capability Avihai Horon
2023-05-10  8:24   ` Juan Quintela
2023-05-17  9:17   ` Markus Armbruster
2023-05-17 10:16     ` Avihai Horon
2023-05-17 12:21       ` Markus Armbruster
2023-05-17 13:23         ` Avihai Horon
2023-05-01 14:01 ` [PATCH 2/8] migration: Add precopy initial data handshake Avihai Horon
2023-05-02 22:54   ` Peter Xu
2023-05-03 15:31     ` Avihai Horon
2023-05-10  8:40   ` Juan Quintela
2023-05-10 15:32     ` Avihai Horon
2023-05-14 16:42   ` Cédric Le Goater
2023-05-15  7:56     ` Avihai Horon
2023-05-01 14:01 ` [PATCH 3/8] migration: Add precopy initial data loaded ACK functionality Avihai Horon
2023-05-02 22:56   ` Peter Xu
2023-05-03 15:36     ` Avihai Horon
2023-05-10  8:54   ` Juan Quintela
2023-05-10 15:52     ` Avihai Horon
2023-05-10 15:59       ` Juan Quintela
2023-05-01 14:01 ` [PATCH 4/8] migration: Enable precopy initial data capability Avihai Horon
2023-05-10  8:55   ` Juan Quintela
2023-05-01 14:01 ` [PATCH 5/8] tests: Add migration precopy initial data capability test Avihai Horon
2023-05-10  8:55   ` Juan Quintela
2023-05-01 14:01 ` [PATCH 6/8] vfio/migration: Refactor vfio_save_block() to return saved data size Avihai Horon
2023-05-10  9:00   ` Juan Quintela
2023-05-01 14:01 ` [PATCH 7/8] vfio/migration: Add VFIO migration pre-copy support Avihai Horon
2023-05-01 14:01 ` Avihai Horon [this message]
2023-05-02 22:49 ` [PATCH 0/8] migration: Add precopy initial data capability and VFIO precopy support Peter Xu
2023-05-03 15:22   ` Avihai Horon
2023-05-03 15:49     ` Peter Xu
2023-05-04 10:18       ` Avihai Horon
2023-05-04 15:50         ` Peter Xu
2023-05-07 12:54           ` Avihai Horon
2023-05-08  0:49             ` Peter Xu
2023-05-08 11:11               ` Avihai Horon
2023-05-10  9:12     ` Juan Quintela
2023-05-10 16:01       ` Avihai Horon
2023-05-10 16:41         ` Juan Quintela
2023-05-11 11:31           ` Avihai Horon
2023-05-11 13:09             ` Juan Quintela
2023-05-11 15:08               ` Avihai Horon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230501140141.11743-9-avihaih@nvidia.com \
    --to=avihaih@nvidia.com \
    --cc=alex.williamson@redhat.com \
    --cc=armbru@redhat.com \
    --cc=clg@redhat.com \
    --cc=eblake@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=joao.m.martins@oracle.com \
    --cc=kwankhede@nvidia.com \
    --cc=leobras@redhat.com \
    --cc=lvivier@redhat.com \
    --cc=maorg@nvidia.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=targupta@nvidia.com \
    --cc=thuth@redhat.com \
    --cc=yishaih@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).