[PATCH v1 11/28] accel/kvm: rebind current VCPUs to the new KVM VM file descriptor upon reset

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Ani Sinha <anisinha@redhat.com>
To: Paolo Bonzini <pbonzini@redhat.com>
Cc: vkuznets@redhat.com, kraxel@redhat.com, qemu-devel@nongnu.org,
	Ani Sinha <anisinha@redhat.com>,
	kvm@vger.kernel.org
Subject: [PATCH v1 11/28] accel/kvm: rebind current VCPUs to the new KVM VM file descriptor upon reset
Date: Fri, 12 Dec 2025 20:33:39 +0530	[thread overview]
Message-ID: <20251212150359.548787-12-anisinha@redhat.com> (raw)
In-Reply-To: <20251212150359.548787-1-anisinha@redhat.com>

Confidential guests needs to generate a new KVM file descriptor upon virtual
machine reset. Existing VCPUs needs to be reattached to this new
KVM VM file descriptor. As a part of this, new VCPU file descriptors against
this new KVM VM file descriptor needs to be created and re-initialized.
Resources allocated against the old VCPU fds needs to be released. This change
makes this happen.

Signed-off-by: Ani Sinha <anisinha@redhat.com>
---
 accel/kvm/kvm-all.c | 201 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 166 insertions(+), 35 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 5b854c9866..638f193626 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -130,6 +130,12 @@ static NotifierWithReturnList register_vmfd_changed_notifiers =
 static NotifierWithReturnList register_vmfd_pre_change_notifiers =
     NOTIFIER_WITH_RETURN_LIST_INITIALIZER(register_vmfd_pre_change_notifiers);
 
+static int kvm_rebind_vcpus(Error **errp);
+
+static int map_kvm_run(KVMState *s, CPUState *cpu, Error **errp);
+static int map_kvm_dirty_gfns(KVMState *s, CPUState *cpu, Error **errp);
+static int vcpu_unmap_regions(KVMState *s, CPUState *cpu);
+
 struct KVMResampleFd {
     int gsi;
     EventNotifier *resample_event;
@@ -423,6 +429,82 @@ err:
     return ret;
 }
 
+static int kvm_rebind_vcpus(Error **errp)
+{
+    CPUState *cpu;
+    unsigned long vcpu_id;
+    KVMState *s = kvm_state;
+    int kvm_fd, ret = 0;
+
+    CPU_FOREACH(cpu) {
+        vcpu_id = kvm_arch_vcpu_id(cpu);
+
+        if (cpu->kvm_fd) {
+            close(cpu->kvm_fd);
+        }
+
+        ret = kvm_arch_destroy_vcpu(cpu);
+        if (ret < 0) {
+            goto err;
+        }
+
+        if (s->coalesced_mmio_ring == (void *)cpu->kvm_run + PAGE_SIZE) {
+            s->coalesced_mmio_ring = NULL;
+        }
+
+        ret = vcpu_unmap_regions(s, cpu);
+        if (ret < 0) {
+            goto err;
+        }
+
+        ret = kvm_arch_pre_create_vcpu(cpu, errp);
+        if (ret < 0) {
+            goto err;
+        }
+
+        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
+        if (kvm_fd < 0) {
+            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu (%s)",
+                         vcpu_id, strerror(kvm_fd));
+            return kvm_fd;
+        }
+
+        cpu->kvm_fd = kvm_fd;
+
+        cpu->vcpu_dirty = false;
+        cpu->dirty_pages = 0;
+        cpu->throttle_us_per_full = 0;
+
+        ret = map_kvm_run(s, cpu, errp);
+        if (ret < 0) {
+            goto err;
+        }
+
+        if (s->kvm_dirty_ring_size) {
+            ret = map_kvm_dirty_gfns(s, cpu, errp);
+            if (ret < 0) {
+                goto err;
+            }
+        }
+
+        ret = kvm_arch_init_vcpu(cpu);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret,
+                             "kvm_init_vcpu: kvm_arch_init_vcpu failed (%lu)",
+                             vcpu_id);
+        }
+
+        close(cpu->kvm_vcpu_stats_fd);
+        cpu->kvm_vcpu_stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
+        kvm_init_cpu_signals(cpu);
+
+        kvm_cpu_synchronize_post_init(cpu);
+    }
+
+ err:
+    return ret;
+}
+
 static void kvm_park_vcpu(CPUState *cpu)
 {
     struct KVMParkedVcpu *vcpu;
@@ -511,19 +593,11 @@ int kvm_create_and_park_vcpu(CPUState *cpu)
     return ret;
 }
 
-static int do_kvm_destroy_vcpu(CPUState *cpu)
+static int vcpu_unmap_regions(KVMState *s, CPUState *cpu)
 {
-    KVMState *s = kvm_state;
     int mmap_size;
     int ret = 0;
 
-    trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
-
-    ret = kvm_arch_destroy_vcpu(cpu);
-    if (ret < 0) {
-        goto err;
-    }
-
     mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
     if (mmap_size < 0) {
         ret = mmap_size;
@@ -551,39 +625,47 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
         cpu->kvm_dirty_gfns = NULL;
     }
 
-    kvm_park_vcpu(cpu);
-err:
+ err:
     return ret;
 }
 
-void kvm_destroy_vcpu(CPUState *cpu)
-{
-    if (do_kvm_destroy_vcpu(cpu) < 0) {
-        error_report("kvm_destroy_vcpu failed");
-        exit(EXIT_FAILURE);
-    }
-}
-
-int kvm_init_vcpu(CPUState *cpu, Error **errp)
+static int do_kvm_destroy_vcpu(CPUState *cpu)
 {
     KVMState *s = kvm_state;
-    int mmap_size;
-    int ret;
+    int ret = 0;
 
-    trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
+    trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
 
-    ret = kvm_arch_pre_create_vcpu(cpu, errp);
+    ret = kvm_arch_destroy_vcpu(cpu);
     if (ret < 0) {
         goto err;
     }
 
-    ret = kvm_create_vcpu(cpu);
+    /* If I am the CPU that created coalesced_mmio_ring, then discard it */
+    if (s->coalesced_mmio_ring == (void *)cpu->kvm_run + PAGE_SIZE) {
+        s->coalesced_mmio_ring = NULL;
+    }
+
+    ret = vcpu_unmap_regions(s, cpu);
     if (ret < 0) {
-        error_setg_errno(errp, -ret,
-                         "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
-                         kvm_arch_vcpu_id(cpu));
         goto err;
     }
+    kvm_park_vcpu(cpu);
+err:
+    return ret;
+}
+
+void kvm_destroy_vcpu(CPUState *cpu)
+{
+    if (do_kvm_destroy_vcpu(cpu) < 0) {
+        error_report("kvm_destroy_vcpu failed");
+        exit(EXIT_FAILURE);
+    }
+}
+
+static int map_kvm_run(KVMState *s, CPUState *cpu, Error **errp)
+{
+    int mmap_size, ret = 0;
 
     mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
     if (mmap_size < 0) {
@@ -608,14 +690,53 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
             (void *)cpu->kvm_run + s->coalesced_mmio * PAGE_SIZE;
     }
 
+ err:
+    return ret;
+}
+
+static int map_kvm_dirty_gfns(KVMState *s, CPUState *cpu, Error **errp)
+{
+    int ret = 0;
+    /* Use MAP_SHARED to share pages with the kernel */
+    cpu->kvm_dirty_gfns = mmap(NULL, s->kvm_dirty_ring_bytes,
+                               PROT_READ | PROT_WRITE, MAP_SHARED,
+                               cpu->kvm_fd,
+                               PAGE_SIZE * KVM_DIRTY_LOG_PAGE_OFFSET);
+    if (cpu->kvm_dirty_gfns == MAP_FAILED) {
+        ret = -errno;
+    }
+
+    return ret;
+}
+
+int kvm_init_vcpu(CPUState *cpu, Error **errp)
+{
+    KVMState *s = kvm_state;
+    int ret;
+
+    trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
+
+    ret = kvm_arch_pre_create_vcpu(cpu, errp);
+    if (ret < 0) {
+        goto err;
+    }
+
+    ret = kvm_create_vcpu(cpu);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret,
+                         "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
+                         kvm_arch_vcpu_id(cpu));
+        goto err;
+    }
+
+    ret = map_kvm_run(s, cpu, errp);
+    if (ret < 0) {
+        goto err;
+    }
+
     if (s->kvm_dirty_ring_size) {
-        /* Use MAP_SHARED to share pages with the kernel */
-        cpu->kvm_dirty_gfns = mmap(NULL, s->kvm_dirty_ring_bytes,
-                                   PROT_READ | PROT_WRITE, MAP_SHARED,
-                                   cpu->kvm_fd,
-                                   PAGE_SIZE * KVM_DIRTY_LOG_PAGE_OFFSET);
-        if (cpu->kvm_dirty_gfns == MAP_FAILED) {
-            ret = -errno;
+        ret = map_kvm_dirty_gfns(s, cpu, errp);
+        if (ret < 0) {
             goto err;
         }
     }
@@ -2716,6 +2837,16 @@ static int kvm_reset_vmfd(MachineState *ms)
     }
     assert(!err);
 
+    /*
+     * rebind new vcpu fds with the new kvm fds
+     * These can only be called after kvm_arch_vmfd_change_ops()
+     */
+    ret = kvm_rebind_vcpus(&err);
+    if (ret < 0) {
+        return ret;
+    }
+    assert(!err);
+
     /* these can be only called after ram_block_rebind() */
     memory_listener_register(&kml->listener, &address_space_memory);
     memory_listener_register(&kvm_io_listener, &address_space_io);
-- 
2.42.0

next prev parent reply	other threads:[~2025-12-12 15:09 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-12 15:03 [PATCH v1 00/28] Introduce support for confidential guest reset Ani Sinha
2025-12-12 15:03 ` [PATCH v1 01/28] i386/kvm: avoid installing duplicate msr entries in msr_handlers Ani Sinha
2025-12-12 15:03 ` [PATCH v1 02/28] hw/accel: add a per-accelerator callback to change VM accelerator handle Ani Sinha
2025-12-12 15:03 ` [PATCH v1 03/28] system/physmem: add helper to reattach existing memory after KVM VM fd change Ani Sinha
2025-12-12 15:03 ` [PATCH v1 04/28] accel/kvm: add changes required to support KVM VM file descriptor change Ani Sinha
2025-12-12 15:03 ` [PATCH v1 05/28] accel/kvm: mark guest state as unprotected after vm " Ani Sinha
2025-12-12 15:03 ` [PATCH v1 06/28] accel/kvm: add a notifier to indicate KVM VM file descriptor has changed Ani Sinha
2025-12-12 15:03 ` [PATCH v1 07/28] kvm/i386: implement architecture support for kvm file descriptor change Ani Sinha
2025-12-12 15:03 ` [PATCH v1 08/28] hw/i386: refactor x86_bios_rom_init for reuse in confidential guest reset Ani Sinha
2025-12-12 15:03 ` [PATCH v1 09/28] kvm/i386: reload firmware for " Ani Sinha
2025-12-12 15:03 ` [PATCH v1 10/28] accel/kvm: Add notifier to inform that the KVM VM file fd is about to be changed Ani Sinha
2025-12-12 15:03 ` Ani Sinha [this message]
2025-12-12 15:03 ` [PATCH v1 12/28] i386/tdx: refactor TDX firmware memory initialization code into a new function Ani Sinha
2025-12-12 15:03 ` [PATCH v1 13/28] i386/tdx: finalize TDX guest state upon reset Ani Sinha
2025-12-12 15:03 ` [PATCH v1 14/28] i386/tdx: add a pre-vmfd change notifier to reset tdx state Ani Sinha
2025-12-12 15:03 ` [PATCH v1 15/28] i386/sev: add migration blockers only once Ani Sinha
2025-12-12 15:03 ` [PATCH v1 16/28] i386/sev: add notifiers " Ani Sinha
2025-12-12 15:03 ` [PATCH v1 17/28] i386/sev: free existing launch update data and kernel hashes data on init Ani Sinha
2025-12-12 15:03 ` [PATCH v1 18/28] i386/sev: add support for confidential guest reset Ani Sinha
2025-12-12 15:03 ` [PATCH v1 19/28] hw/vfio: generate new file fd for pseudo device and rebind existing descriptors Ani Sinha
2025-12-12 15:03 ` [PATCH v1 20/28] kvm/i8254: add support for confidential guest reset Ani Sinha
2025-12-12 15:03 ` [PATCH v1 21/28] hw/hyperv/vmbus: " Ani Sinha
2025-12-12 15:03 ` [PATCH v1 22/28] accel/kvm: add a per-confidential class callback to unlock guest state Ani Sinha
2025-12-12 15:03 ` [PATCH v1 23/28] kvm/xen-emu: re-initialize capabilities during confidential guest reset Ani Sinha
2025-12-12 15:03 ` [PATCH v1 24/28] kvm/xen_evtchn: add support for " Ani Sinha
2025-12-12 15:03 ` [PATCH v1 25/28] ppc/openpic: create a new openpic device and reattach mem region on coco reset Ani Sinha
2025-12-12 15:03 ` [PATCH v1 26/28] kvm/vcpu: add notifiers to inform vcpu file descriptor change Ani Sinha
2025-12-12 15:03 ` [PATCH v1 27/28] kvm/i386/apic: set local apic after vcpu file descriptors changed Ani Sinha
2025-12-12 15:03 ` [PATCH v1 28/28] kvm/clock: add support for confidential guest reset Ani Sinha

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:5b854c986 dfblob:638f19362 )
 OR (
bs:"[PATCH v1 11/28] accel/kvm: rebind current VCPUs to the new KVM VM file descriptor upon reset" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251212150359.548787-12-anisinha@redhat.com \
    --to=anisinha@redhat.com \
    --cc=kraxel@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=vkuznets@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).