public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Magnus Kulke <magnuskulke@linux.microsoft.com>
To: qemu-devel@nongnu.org
Cc: kvm@vger.kernel.org, "Magnus Kulke" <magnuskulke@microsoft.com>,
	"Wei Liu" <liuwe@microsoft.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	"Cédric Le Goater" <clg@redhat.com>,
	"Zhao Liu" <zhao1.liu@intel.com>,
	"Richard Henderson" <richard.henderson@linaro.org>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Wei Liu" <wei.liu@kernel.org>,
	"Magnus Kulke" <magnuskulke@linux.microsoft.com>,
	"Alex Williamson" <alex@shazbot.org>,
	"Marcel Apfelbaum" <marcel.apfelbaum@gmail.com>,
	"Philippe Mathieu-Daudé" <philmd@linaro.org>,
	"Marcelo Tosatti" <mtosatti@redhat.com>
Subject: [PATCH 34/34] accel/mshv: enable dirty page tracking
Date: Fri, 17 Apr 2026 12:56:18 +0200	[thread overview]
Message-ID: <20260417105618.3621-35-magnuskulke@linux.microsoft.com> (raw)
In-Reply-To: <20260417105618.3621-1-magnuskulke@linux.microsoft.com>

This change introduces the functions required to perform dirty page
tracking to speed up migrations. We are using the sync, global_start,
and global_stop hooks.

The sync is implemented in batches.

Before we can disable the dirty page tracking we have to set all dirty bits.

Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com>
---
 accel/mshv/mem.c          | 211 ++++++++++++++++++++++++++++++++++++++
 accel/mshv/mshv-all.c     |   3 +
 include/system/mshv_int.h |   5 +
 3 files changed, 219 insertions(+)

diff --git a/accel/mshv/mem.c b/accel/mshv/mem.c
index e55c38d4db..820f87ef0c 100644
--- a/accel/mshv/mem.c
+++ b/accel/mshv/mem.c
@@ -12,10 +12,13 @@
 
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
+#include "qapi/error.h"
 #include "linux/mshv.h"
 #include "system/address-spaces.h"
 #include "system/mshv.h"
 #include "system/mshv_int.h"
+#include "hw/hyperv/hvhdk_mini.h"
+#include "system/physmem.h"
 #include "exec/memattrs.h"
 #include <sys/ioctl.h>
 #include "trace.h"
@@ -211,3 +214,211 @@ void mshv_set_phys_mem(MshvMemoryListener *mml, MemoryRegionSection *section,
         abort();
     }
 }
+
+static int enable_dirty_page_tracking(int vm_fd)
+{
+    int ret;
+    struct hv_input_set_partition_property in = {0};
+    struct mshv_root_hvcall args = {0};
+
+    in.property_code = HV_PARTITION_PROPERTY_GPA_PAGE_ACCESS_TRACKING;
+    in.property_value = 1;
+
+    args.code = HVCALL_SET_PARTITION_PROPERTY;
+    args.in_sz = sizeof(in);
+    args.in_ptr = (uint64_t)&in;
+
+    ret = mshv_hvcall(vm_fd, &args);
+    if (ret < 0) {
+        error_report("Failed to enable dirty page tracking: %s",
+                     strerror(errno));
+        return -1;
+    }
+
+    return 0;
+}
+
+/*
+ * Retrieve dirty page bitmap for a GPA range, clearing the dirty bits
+ * atomically. Large ranges are handled in batches.
+ */
+static int get_dirty_log(int vm_fd, uint64_t base_pfn, uint64_t page_count,
+                         unsigned long *bitmap, size_t bitmap_size)
+{
+    uint64_t batch, bitmap_offset, completed = 0;
+    struct mshv_gpap_access_bitmap args = {0};
+    int ret;
+
+    QEMU_BUILD_BUG_ON(MSHV_DIRTY_PAGES_BATCH_SIZE % BITS_PER_LONG != 0);
+    assert(bitmap_size >= ROUND_UP(page_count, BITS_PER_LONG) / 8);
+
+    while (completed < page_count) {
+        batch = MIN(MSHV_DIRTY_PAGES_BATCH_SIZE, page_count - completed);
+        bitmap_offset = completed / BITS_PER_LONG;
+
+        args.access_type = MSHV_GPAP_ACCESS_TYPE_DIRTY;
+        args.access_op   = MSHV_GPAP_ACCESS_OP_CLEAR;
+        args.page_count  = batch;
+        args.gpap_base   = base_pfn + completed;
+        args.bitmap_ptr  = (uint64_t)(bitmap + bitmap_offset);
+
+        ret = ioctl(vm_fd, MSHV_GET_GPAP_ACCESS_BITMAP, &args);
+        if (ret < 0) {
+            error_report("Failed to get dirty log (base_pfn=0x%" PRIx64
+                         " batch=%" PRIu64 "): %s",
+                         base_pfn + completed, batch, strerror(errno));
+            return -1;
+        }
+        completed += batch;
+    }
+
+    return 0;
+}
+
+bool mshv_log_global_start(MemoryListener *listener, Error **errp)
+{
+    int ret;
+
+    ret = enable_dirty_page_tracking(mshv_state->vm);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to enable dirty page tracking");
+        return false;
+    }
+    return true;
+}
+
+static int disable_dirty_page_tracking(int vm_fd)
+{
+    int ret;
+    struct hv_input_set_partition_property in = {0};
+    struct mshv_root_hvcall args = {0};
+
+    in.property_code = HV_PARTITION_PROPERTY_GPA_PAGE_ACCESS_TRACKING;
+    in.property_value = 0;
+
+    args.code = HVCALL_SET_PARTITION_PROPERTY;
+    args.in_sz = sizeof(in);
+    args.in_ptr = (uint64_t)&in;
+
+    ret = mshv_hvcall(vm_fd, &args);
+    if (ret < 0) {
+        error_report("Failed to disable dirty page tracking: %s",
+                     strerror(errno));
+        return -1;
+    }
+
+    return 0;
+}
+
+static int set_dirty_pages(int vm_fd, uint64_t base_pfn, uint64_t page_count)
+{
+    uint64_t batch, completed = 0;
+    unsigned long bitmap[MSHV_DIRTY_PAGES_BATCH_SIZE / BITS_PER_LONG];
+    struct mshv_gpap_access_bitmap args = {0};
+    int ret;
+
+    while (completed < page_count) {
+        batch = MIN(MSHV_DIRTY_PAGES_BATCH_SIZE, page_count - completed);
+
+        args.access_type = MSHV_GPAP_ACCESS_TYPE_DIRTY;
+        args.access_op   = MSHV_GPAP_ACCESS_OP_SET;
+        args.page_count  = batch;
+        args.gpap_base   = base_pfn + completed;
+        args.bitmap_ptr  = (uint64_t)bitmap;
+
+        ret = ioctl(vm_fd, MSHV_GET_GPAP_ACCESS_BITMAP, &args);
+        if (ret < 0) {
+            error_report("Failed to set dirty pages (base_pfn=0x%" PRIx64
+                         " batch=%" PRIu64 "): %s",
+                         base_pfn + completed, batch, strerror(errno));
+            return -1;
+        }
+        completed += batch;
+    }
+
+    return 0;
+}
+
+static bool set_dirty_bits_cb(Int128 start, Int128 len, const MemoryRegion *mr,
+                              hwaddr offset_in_region, void *opaque)
+{
+    int ret, *errp = opaque;
+    hwaddr gpa, size;
+    uint64_t page_count, base_pfn;
+
+    gpa = int128_get64(start);
+    size = int128_get64(len);
+    page_count = size >> MSHV_PAGE_SHIFT;
+    base_pfn = gpa >> MSHV_PAGE_SHIFT;
+
+    if (!mr->ram || mr->readonly) {
+        return false;
+    }
+
+    if (page_count == 0) {
+        return false;
+    }
+
+    ret = set_dirty_pages(mshv_state->vm, base_pfn, page_count);
+
+    /* true aborts the iteration, which is what we want if there's an error */
+    if (ret < 0) {
+        *errp = ret;
+        return true;
+    }
+
+    return false;
+}
+
+void mshv_log_global_stop(MemoryListener *listener)
+{
+    int err = 0;
+    /* MSHV requires all dirty bits to be set before disabling tracking. */
+    FlatView *fv = address_space_to_flatview(&address_space_memory);
+    flatview_for_each_range(fv, set_dirty_bits_cb, &err);
+
+    if (err < 0) {
+        error_report("Failed to set dirty bits before disabling tracking");
+    }
+
+    disable_dirty_page_tracking(mshv_state->vm);
+}
+
+void mshv_log_sync(MemoryListener *listener, MemoryRegionSection *section)
+{
+    hwaddr size, start_addr, mr_offset;
+    uint64_t page_count, base_pfn;
+    size_t bitmap_size;
+    unsigned long *bitmap;
+    ram_addr_t ram_addr;
+    int ret;
+    MemoryRegion *mr = section->mr;
+
+    if (!memory_region_is_ram(mr) || memory_region_is_rom(mr)) {
+        return;
+    }
+
+    size = align_section(section, &start_addr);
+    if (!size) {
+        return;
+    }
+
+    page_count = size >> MSHV_PAGE_SHIFT;
+    base_pfn = start_addr >> MSHV_PAGE_SHIFT;
+    bitmap_size = ROUND_UP(page_count, BITS_PER_LONG) / 8;
+    bitmap = g_malloc0(bitmap_size);
+
+    ret = get_dirty_log(mshv_state->vm, base_pfn, page_count, bitmap,
+                        bitmap_size);
+    if (ret < 0) {
+        g_free(bitmap);
+        return;
+    }
+
+    mr_offset = section->offset_within_region + start_addr -
+                section->offset_within_address_space;
+    ram_addr = memory_region_get_ram_addr(mr) + mr_offset;
+
+    physical_memory_set_dirty_lebitmap(bitmap, ram_addr, page_count);
+    g_free(bitmap);
+}
diff --git a/accel/mshv/mshv-all.c b/accel/mshv/mshv-all.c
index ffe84d6151..94ff9cdb49 100644
--- a/accel/mshv/mshv-all.c
+++ b/accel/mshv/mshv-all.c
@@ -546,6 +546,9 @@ static MemoryListener mshv_memory_listener = {
     .region_del = mem_region_del,
     .eventfd_add = mem_ioeventfd_add,
     .eventfd_del = mem_ioeventfd_del,
+    .log_sync = mshv_log_sync,
+    .log_global_start = mshv_log_global_start,
+    .log_global_stop = mshv_log_global_stop,
 };
 
 static MemoryListener mshv_io_listener = {
diff --git a/include/system/mshv_int.h b/include/system/mshv_int.h
index c24efc8675..ddbdd76076 100644
--- a/include/system/mshv_int.h
+++ b/include/system/mshv_int.h
@@ -31,6 +31,8 @@ struct mshv_get_set_vp_state;
 #define MSHV_HV_INTERRUPTION_TYPE_PRIV_SW_EXC 5
 #define MSHV_HV_INTERRUPTION_TYPE_SW_EXC      6
 
+#define MSHV_DIRTY_PAGES_BATCH_SIZE 0x10000
+
 typedef struct hyperv_message hv_message;
 
 typedef struct MshvHvCallArgs {
@@ -128,6 +130,9 @@ int mshv_guest_mem_write(uint64_t gpa, const uint8_t *data, uintptr_t size,
                          bool is_secure_mode);
 void mshv_set_phys_mem(MshvMemoryListener *mml, MemoryRegionSection *section,
                        bool add);
+void mshv_log_sync(MemoryListener *listener, MemoryRegionSection *section);
+bool mshv_log_global_start(MemoryListener *listener, Error **errp);
+void mshv_log_global_stop(MemoryListener *listener);
 
 /* msr */
 int mshv_init_msrs(const CPUState *cpu);
-- 
2.34.1


      parent reply	other threads:[~2026-04-17 10:58 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-17 10:55 [PATCH 00/34] Add migration support to the MSHV accelerator Magnus Kulke
2026-04-17 10:55 ` [PATCH 01/34] target/i386/mshv: use arch_load/store_reg fns Magnus Kulke
2026-04-17 10:55 ` [PATCH 02/34] target/i386/mshv: use generic FPU/xcr0 state Magnus Kulke
2026-04-17 10:55 ` [PATCH 03/34] target/i386/mshv: impl init/load/store_vcpu_state Magnus Kulke
2026-04-17 10:55 ` [PATCH 04/34] accel/accel-irq: add AccelRouteChange abstraction Magnus Kulke
2026-04-17 10:55 ` [PATCH 05/34] accel/accel-irq: add generic begin_route_changes Magnus Kulke
2026-04-17 10:55 ` [PATCH 06/34] accel/accel-irq: add generic commit_route_changes Magnus Kulke
2026-04-17 10:55 ` [PATCH 07/34] accel/mshv: add irq_routes to state Magnus Kulke
2026-04-17 10:55 ` [PATCH 08/34] accel/mshv: update s->irq_routes in add_msi_route Magnus Kulke
2026-04-17 10:55 ` [PATCH 09/34] accel/mshv: update s->irq_routes in update_msi_route Magnus Kulke
2026-04-17 10:55 ` [PATCH 10/34] accel/mshv: update s->irq_routes in release_virq Magnus Kulke
2026-04-17 10:55 ` [PATCH 11/34] accel/mshv: use s->irq_routes in commit_routes Magnus Kulke
2026-04-17 10:55 ` [PATCH 12/34] accel/mshv: reserve ioapic routes on s->irq_routes Magnus Kulke
2026-04-17 10:55 ` [PATCH 13/34] accel/mshv: remove redundant msi controller Magnus Kulke
2026-04-17 10:55 ` [PATCH 14/34] target/i386/mshv: move apic logic into own file Magnus Kulke
2026-04-17 10:55 ` [PATCH 15/34] target/i386/mshv: remove redundant apic helpers Magnus Kulke
2026-04-17 10:56 ` [PATCH 16/34] target/i386/mshv: migrate LAPIC state Magnus Kulke
2026-04-17 11:54   ` Mohamed Mediouni
2026-04-20 11:37     ` Magnus Kulke
2026-04-17 10:56 ` [PATCH 17/34] target/i386/mshv: move msr code to arch Magnus Kulke
2026-04-17 10:56 ` [PATCH 18/34] accel/mshv: store partition proc features Magnus Kulke
2026-04-17 10:56 ` [PATCH 19/34] target/i386/mshv: expose msvh_get_generic_regs Magnus Kulke
2026-04-17 10:56 ` [PATCH 20/34] target/i386/mshv: migrate MSRs Magnus Kulke
2026-04-17 10:56 ` [PATCH 21/34] target/i386/mshv: migrate MTRR MSRs Magnus Kulke
2026-04-17 10:56 ` [PATCH 22/34] target/i386/mshv: migrate Synic SINT MSRs Magnus Kulke
2026-04-17 10:56 ` [PATCH 23/34] target/i386/mshv: migrate CET/SS MSRs Magnus Kulke
2026-04-17 10:56 ` [PATCH 24/34] target/i386/mshv: migrate SIMP and SIEFP state Magnus Kulke
2026-04-17 10:56 ` [PATCH 25/34] target/i386/mshv: migrate STIMER state Magnus Kulke
2026-04-17 10:56 ` [PATCH 26/34] accel/mshv: introduce SaveVMHandler Magnus Kulke
2026-04-17 10:56 ` [PATCH 27/34] accel/mshv: write synthetic MSRs after migration Magnus Kulke
2026-04-17 10:56 ` [PATCH 28/34] accel/mshv: migrate REFERENCE_TIME Magnus Kulke
2026-04-17 10:56 ` [PATCH 29/34] target/i386/mshv: migrate pending ints/excs Magnus Kulke
2026-04-17 10:56 ` [PATCH 30/34] target/i386: add de/compaction to xsave_helper Magnus Kulke
2026-04-17 11:56   ` Mohamed Mediouni
2026-04-18 17:46   ` Mohamed Mediouni
2026-04-20 12:02     ` Magnus Kulke
2026-04-17 10:56 ` [PATCH 31/34] target/i386/mshv: migrate XSAVE state Magnus Kulke
2026-04-17 10:56 ` [PATCH 32/34] target/i386/mshv: reconstruct hflags after load Magnus Kulke
2026-04-17 10:56 ` [PATCH 33/34] target/i386/mshv: migrate MP_STATE Magnus Kulke
2026-04-17 10:56 ` Magnus Kulke [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260417105618.3621-35-magnuskulke@linux.microsoft.com \
    --to=magnuskulke@linux.microsoft.com \
    --cc=alex@shazbot.org \
    --cc=clg@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=liuwe@microsoft.com \
    --cc=magnuskulke@microsoft.com \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=mst@redhat.com \
    --cc=mtosatti@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=philmd@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=wei.liu@kernel.org \
    --cc=zhao1.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox