All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Xu <peterx@redhat.com>
To: qemu-devel@nongnu.org
Cc: "Maciej S . Szmigiero" <mail@maciej.szmigiero.name>,
	"Daniel P . Berrangé" <berrange@redhat.com>,
	"Zhiyi Guo" <zhguo@redhat.com>,
	"Juraj Marcin" <jmarcin@redhat.com>,
	"Peter Xu" <peterx@redhat.com>,
	"Prasad Pandit" <ppandit@redhat.com>,
	"Avihai Horon" <avihaih@nvidia.com>,
	"Kirti Wankhede" <kwankhede@nvidia.com>,
	"Cédric Le Goater" <clg@redhat.com>,
	"Fabiano Rosas" <farosas@suse.de>,
	"Joao Martins" <joao.m.martins@oracle.com>,
	"Markus Armbruster" <armbru@redhat.com>,
	"Alex Williamson" <alex@shazbot.org>
Subject: [PATCH 03/14] vfio/migration: Cache stop size in VFIOMigration
Date: Wed,  8 Apr 2026 12:55:47 -0400	[thread overview]
Message-ID: <20260408165559.157108-4-peterx@redhat.com> (raw)
In-Reply-To: <20260408165559.157108-1-peterx@redhat.com>

Add a field to cache stop size.  Note that there's an initial value change
in vfio_save_setup for the stop size default, but it shouldn't matter if it
is followed with a math of MIN() against VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE.

Document that all the three sizes we read from VFIO's uAPI on dirty or stop
sizes are estimates, so QEMU needs to always remember they can be anything.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 hw/vfio/vfio-migration-internal.h |  8 +++++
 hw/vfio/migration.c               | 50 ++++++++++++++++++-------------
 2 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/hw/vfio/vfio-migration-internal.h b/hw/vfio/vfio-migration-internal.h
index 814fbd9eba..a15fc74703 100644
--- a/hw/vfio/vfio-migration-internal.h
+++ b/hw/vfio/vfio-migration-internal.h
@@ -45,8 +45,16 @@ typedef struct VFIOMigration {
     void *data_buffer;
     size_t data_buffer_size;
     uint64_t mig_flags;
+    /*
+     * NOTE: all three sizes cached are reported from VFIO's uAPI, which
+     * are defined as estimate only.  QEMU should not trust these values
+     * but only use them to do best-effort estimates.  Always be prepared
+     * that these sizes may either grow or even shrink in reality while
+     * read()ing from the VFIO fds.
+     */
     uint64_t precopy_init_size;
     uint64_t precopy_dirty_size;
+    uint64_t stopcopy_size;
     bool multifd_transfer;
     VFIOMultifd *multifd;
     bool initial_data_sent;
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 83327b6573..5d5fca09bd 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -41,6 +41,12 @@
  */
 #define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB)
 
+/*
+ * Migration size of VFIO devices can be as little as a few KBs or as big as
+ * many GBs. This value should be big enough to cover the worst case.
+ */
+#define VFIO_MIG_STOP_COPY_SIZE (100 * GiB)
+
 static unsigned long bytes_transferred;
 
 static const char *mig_state_to_str(enum vfio_device_mig_state state)
@@ -314,8 +320,7 @@ static void vfio_migration_cleanup(VFIODevice *vbasedev)
     migration->data_fd = -1;
 }
 
-static int vfio_query_stop_copy_size(VFIODevice *vbasedev,
-                                     uint64_t *stop_copy_size)
+static int vfio_query_stop_copy_size(VFIODevice *vbasedev)
 {
     uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
                               sizeof(struct vfio_device_feature_mig_data_size),
@@ -323,16 +328,22 @@ static int vfio_query_stop_copy_size(VFIODevice *vbasedev,
     struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
     struct vfio_device_feature_mig_data_size *mig_data_size =
         (struct vfio_device_feature_mig_data_size *)feature->data;
+    VFIOMigration *migration = vbasedev->migration;
 
     feature->argsz = sizeof(buf);
     feature->flags =
         VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIG_DATA_SIZE;
 
     if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
+        /*
+         * If getting pending migration size fails, VFIO_MIG_STOP_COPY_SIZE
+         * is reported so downtime limit won't be violated.
+         */
+        migration->stopcopy_size = VFIO_MIG_STOP_COPY_SIZE;
         return -errno;
     }
 
-    *stop_copy_size = mig_data_size->stop_copy_length;
+    migration->stopcopy_size = mig_data_size->stop_copy_length;
 
     return 0;
 }
@@ -409,6 +420,16 @@ static void vfio_update_estimated_pending_data(VFIOMigration *migration,
         return;
     }
 
+    /*
+     * The total size remaining requires separate accounting.  Do not trust
+     * the counter, so what we have read() may be more than what reported.
+     */
+    if (migration->stopcopy_size > data_size) {
+        migration->stopcopy_size -= data_size;
+    } else {
+        migration->stopcopy_size = 0;
+    }
+
     if (migration->precopy_init_size) {
         uint64_t init_size = MIN(migration->precopy_init_size, data_size);
 
@@ -463,7 +484,6 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
 {
     VFIODevice *vbasedev = opaque;
     VFIOMigration *migration = vbasedev->migration;
-    uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
     int ret;
 
     if (!vfio_multifd_setup(vbasedev, false, errp)) {
@@ -472,9 +492,9 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
 
     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
 
-    vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
+    vfio_query_stop_copy_size(vbasedev);
     migration->data_buffer_size = MIN(VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE,
-                                      stop_copy_size);
+                                      migration->stopcopy_size);
     migration->data_buffer = g_try_malloc0(migration->data_buffer_size);
     if (!migration->data_buffer) {
         error_setg(errp, "%s: Failed to allocate migration data buffer",
@@ -570,32 +590,22 @@ static void vfio_state_pending_estimate(void *opaque, uint64_t *must_precopy,
                                       migration->precopy_dirty_size);
 }
 
-/*
- * Migration size of VFIO devices can be as little as a few KBs or as big as
- * many GBs. This value should be big enough to cover the worst case.
- */
-#define VFIO_MIG_STOP_COPY_SIZE (100 * GiB)
-
 static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy,
                                      uint64_t *can_postcopy)
 {
     VFIODevice *vbasedev = opaque;
     VFIOMigration *migration = vbasedev->migration;
-    uint64_t stop_copy_size = VFIO_MIG_STOP_COPY_SIZE;
 
-    /*
-     * If getting pending migration size fails, VFIO_MIG_STOP_COPY_SIZE is
-     * reported so downtime limit won't be violated.
-     */
-    vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
-    *must_precopy += stop_copy_size;
+    vfio_query_stop_copy_size(vbasedev);
+    *must_precopy += migration->stopcopy_size;
 
     if (vfio_device_state_is_precopy(vbasedev)) {
         vfio_query_precopy_size(migration);
     }
 
     trace_vfio_state_pending_exact(vbasedev->name, *must_precopy, *can_postcopy,
-                                   stop_copy_size, migration->precopy_init_size,
+                                   migration->stopcopy_size,
+                                   migration->precopy_init_size,
                                    migration->precopy_dirty_size);
 }
 
-- 
2.53.0



  parent reply	other threads:[~2026-04-08 19:39 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-08 16:55 [PATCH 00/14] migration/vfio: Fix a few issues on API misuse or statistic reports Peter Xu
2026-04-08 16:55 ` [PATCH 01/14] migration: Fix low possibility downtime violation Peter Xu
2026-04-08 16:55 ` [PATCH 02/14] migration/qapi: Rename MigrationStats to MigrationRAMStats Peter Xu
2026-04-09 17:08   ` Juraj Marcin
2026-04-10 11:10   ` Michal Prívozník
2026-04-15 16:09     ` Peter Xu
2026-04-08 16:55 ` Peter Xu [this message]
2026-04-13  9:52   ` [PATCH 03/14] vfio/migration: Cache stop size in VFIOMigration Avihai Horon
2026-04-08 16:55 ` [PATCH 04/14] migration/treewide: Merge @state_pending_{exact|estimate} APIs Peter Xu
2026-04-09 17:10   ` Juraj Marcin
2026-04-15 16:23     ` Peter Xu
2026-04-16  8:24       ` Juraj Marcin
2026-04-13  9:57   ` Avihai Horon
2026-04-16 14:01     ` Peter Xu
2026-04-16 14:18   ` Jason J. Herne
2026-04-08 16:55 ` [PATCH 05/14] migration: Use the new save_query_pending() API directly Peter Xu
2026-04-13  9:59   ` Avihai Horon
2026-04-08 16:55 ` [PATCH 06/14] migration: Introduce stopcopy_bytes in save_query_pending() Peter Xu
2026-04-09 17:13   ` Juraj Marcin
2026-04-09 17:36   ` Juraj Marcin
2026-04-16 17:20     ` Peter Xu
2026-04-17 10:18       ` Juraj Marcin
2026-04-13 10:34   ` Avihai Horon
2026-04-08 16:55 ` [PATCH 07/14] vfio/migration: Fix incorrect reporting for VFIO pending data Peter Xu
2026-04-13 10:56   ` Avihai Horon
2026-04-08 16:55 ` [PATCH 08/14] migration: Make qemu_savevm_query_pending() available anytime Peter Xu
2026-04-09 17:15   ` Juraj Marcin
2026-04-16 18:06     ` Peter Xu
2026-04-17 10:26       ` Juraj Marcin
2026-04-20 15:56         ` Peter Xu
2026-04-08 16:55 ` [PATCH 09/14] migration: Move iteration counter out of RAM Peter Xu
2026-04-09 22:14   ` Fabiano Rosas
2026-04-16 18:15     ` Peter Xu
2026-04-16 21:15       ` Fabiano Rosas
2026-04-08 16:55 ` [PATCH 10/14] migration: Introduce a helper to return switchover bw estimate Peter Xu
2026-04-08 16:55 ` [PATCH 11/14] migration: Calculate expected downtime on demand Peter Xu
2026-04-09 17:16   ` Juraj Marcin
2026-04-08 16:55 ` [PATCH 12/14] migration: Fix calculation of expected_downtime to take VFIO info Peter Xu
2026-04-09 17:17   ` Juraj Marcin
2026-04-09 22:17   ` Fabiano Rosas
2026-04-16 18:19     ` Peter Xu
2026-04-08 16:55 ` [PATCH 13/14] migration/qapi: Introduce system-wise "remaining" reports Peter Xu
2026-04-09 17:41   ` Juraj Marcin
2026-04-09 21:48   ` Dr. David Alan Gilbert
2026-04-16 18:25     ` Peter Xu
2026-04-09 22:21   ` Fabiano Rosas
2026-04-16 18:26     ` Peter Xu
2026-04-08 16:55 ` [PATCH 14/14] migration/qapi: Update unit for avail-switchover-bandwidth Peter Xu
2026-04-09 17:40   ` Juraj Marcin
2026-04-08 18:37 ` [PATCH 00/14] migration/vfio: Fix a few issues on API misuse or statistic reports Peter Xu
2026-04-13 16:09 ` Cédric Le Goater
2026-04-15 16:06   ` Peter Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260408165559.157108-4-peterx@redhat.com \
    --to=peterx@redhat.com \
    --cc=alex@shazbot.org \
    --cc=armbru@redhat.com \
    --cc=avihaih@nvidia.com \
    --cc=berrange@redhat.com \
    --cc=clg@redhat.com \
    --cc=farosas@suse.de \
    --cc=jmarcin@redhat.com \
    --cc=joao.m.martins@oracle.com \
    --cc=kwankhede@nvidia.com \
    --cc=mail@maciej.szmigiero.name \
    --cc=ppandit@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=zhguo@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.