From: Ani Sinha <anisinha@redhat.com>
To: Paolo Bonzini <pbonzini@redhat.com>
Cc: vkuznets@redhat.com, kraxel@redhat.com, qemu-devel@nongnu.org,
Ani Sinha <anisinha@redhat.com>,
kvm@vger.kernel.org
Subject: [PATCH v1 11/28] accel/kvm: rebind current VCPUs to the new KVM VM file descriptor upon reset
Date: Fri, 12 Dec 2025 20:33:39 +0530 [thread overview]
Message-ID: <20251212150359.548787-12-anisinha@redhat.com> (raw)
In-Reply-To: <20251212150359.548787-1-anisinha@redhat.com>
Confidential guests needs to generate a new KVM file descriptor upon virtual
machine reset. Existing VCPUs needs to be reattached to this new
KVM VM file descriptor. As a part of this, new VCPU file descriptors against
this new KVM VM file descriptor needs to be created and re-initialized.
Resources allocated against the old VCPU fds needs to be released. This change
makes this happen.
Signed-off-by: Ani Sinha <anisinha@redhat.com>
---
accel/kvm/kvm-all.c | 201 ++++++++++++++++++++++++++++++++++++--------
1 file changed, 166 insertions(+), 35 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 5b854c9866..638f193626 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -130,6 +130,12 @@ static NotifierWithReturnList register_vmfd_changed_notifiers =
static NotifierWithReturnList register_vmfd_pre_change_notifiers =
NOTIFIER_WITH_RETURN_LIST_INITIALIZER(register_vmfd_pre_change_notifiers);
+static int kvm_rebind_vcpus(Error **errp);
+
+static int map_kvm_run(KVMState *s, CPUState *cpu, Error **errp);
+static int map_kvm_dirty_gfns(KVMState *s, CPUState *cpu, Error **errp);
+static int vcpu_unmap_regions(KVMState *s, CPUState *cpu);
+
struct KVMResampleFd {
int gsi;
EventNotifier *resample_event;
@@ -423,6 +429,82 @@ err:
return ret;
}
+static int kvm_rebind_vcpus(Error **errp)
+{
+ CPUState *cpu;
+ unsigned long vcpu_id;
+ KVMState *s = kvm_state;
+ int kvm_fd, ret = 0;
+
+ CPU_FOREACH(cpu) {
+ vcpu_id = kvm_arch_vcpu_id(cpu);
+
+ if (cpu->kvm_fd) {
+ close(cpu->kvm_fd);
+ }
+
+ ret = kvm_arch_destroy_vcpu(cpu);
+ if (ret < 0) {
+ goto err;
+ }
+
+ if (s->coalesced_mmio_ring == (void *)cpu->kvm_run + PAGE_SIZE) {
+ s->coalesced_mmio_ring = NULL;
+ }
+
+ ret = vcpu_unmap_regions(s, cpu);
+ if (ret < 0) {
+ goto err;
+ }
+
+ ret = kvm_arch_pre_create_vcpu(cpu, errp);
+ if (ret < 0) {
+ goto err;
+ }
+
+ kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
+ if (kvm_fd < 0) {
+ error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu (%s)",
+ vcpu_id, strerror(kvm_fd));
+ return kvm_fd;
+ }
+
+ cpu->kvm_fd = kvm_fd;
+
+ cpu->vcpu_dirty = false;
+ cpu->dirty_pages = 0;
+ cpu->throttle_us_per_full = 0;
+
+ ret = map_kvm_run(s, cpu, errp);
+ if (ret < 0) {
+ goto err;
+ }
+
+ if (s->kvm_dirty_ring_size) {
+ ret = map_kvm_dirty_gfns(s, cpu, errp);
+ if (ret < 0) {
+ goto err;
+ }
+ }
+
+ ret = kvm_arch_init_vcpu(cpu);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "kvm_init_vcpu: kvm_arch_init_vcpu failed (%lu)",
+ vcpu_id);
+ }
+
+ close(cpu->kvm_vcpu_stats_fd);
+ cpu->kvm_vcpu_stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
+ kvm_init_cpu_signals(cpu);
+
+ kvm_cpu_synchronize_post_init(cpu);
+ }
+
+ err:
+ return ret;
+}
+
static void kvm_park_vcpu(CPUState *cpu)
{
struct KVMParkedVcpu *vcpu;
@@ -511,19 +593,11 @@ int kvm_create_and_park_vcpu(CPUState *cpu)
return ret;
}
-static int do_kvm_destroy_vcpu(CPUState *cpu)
+static int vcpu_unmap_regions(KVMState *s, CPUState *cpu)
{
- KVMState *s = kvm_state;
int mmap_size;
int ret = 0;
- trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
-
- ret = kvm_arch_destroy_vcpu(cpu);
- if (ret < 0) {
- goto err;
- }
-
mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
if (mmap_size < 0) {
ret = mmap_size;
@@ -551,39 +625,47 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
cpu->kvm_dirty_gfns = NULL;
}
- kvm_park_vcpu(cpu);
-err:
+ err:
return ret;
}
-void kvm_destroy_vcpu(CPUState *cpu)
-{
- if (do_kvm_destroy_vcpu(cpu) < 0) {
- error_report("kvm_destroy_vcpu failed");
- exit(EXIT_FAILURE);
- }
-}
-
-int kvm_init_vcpu(CPUState *cpu, Error **errp)
+static int do_kvm_destroy_vcpu(CPUState *cpu)
{
KVMState *s = kvm_state;
- int mmap_size;
- int ret;
+ int ret = 0;
- trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
+ trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
- ret = kvm_arch_pre_create_vcpu(cpu, errp);
+ ret = kvm_arch_destroy_vcpu(cpu);
if (ret < 0) {
goto err;
}
- ret = kvm_create_vcpu(cpu);
+ /* If I am the CPU that created coalesced_mmio_ring, then discard it */
+ if (s->coalesced_mmio_ring == (void *)cpu->kvm_run + PAGE_SIZE) {
+ s->coalesced_mmio_ring = NULL;
+ }
+
+ ret = vcpu_unmap_regions(s, cpu);
if (ret < 0) {
- error_setg_errno(errp, -ret,
- "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
- kvm_arch_vcpu_id(cpu));
goto err;
}
+ kvm_park_vcpu(cpu);
+err:
+ return ret;
+}
+
+void kvm_destroy_vcpu(CPUState *cpu)
+{
+ if (do_kvm_destroy_vcpu(cpu) < 0) {
+ error_report("kvm_destroy_vcpu failed");
+ exit(EXIT_FAILURE);
+ }
+}
+
+static int map_kvm_run(KVMState *s, CPUState *cpu, Error **errp)
+{
+ int mmap_size, ret = 0;
mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
if (mmap_size < 0) {
@@ -608,14 +690,53 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
(void *)cpu->kvm_run + s->coalesced_mmio * PAGE_SIZE;
}
+ err:
+ return ret;
+}
+
+static int map_kvm_dirty_gfns(KVMState *s, CPUState *cpu, Error **errp)
+{
+ int ret = 0;
+ /* Use MAP_SHARED to share pages with the kernel */
+ cpu->kvm_dirty_gfns = mmap(NULL, s->kvm_dirty_ring_bytes,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ cpu->kvm_fd,
+ PAGE_SIZE * KVM_DIRTY_LOG_PAGE_OFFSET);
+ if (cpu->kvm_dirty_gfns == MAP_FAILED) {
+ ret = -errno;
+ }
+
+ return ret;
+}
+
+int kvm_init_vcpu(CPUState *cpu, Error **errp)
+{
+ KVMState *s = kvm_state;
+ int ret;
+
+ trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
+
+ ret = kvm_arch_pre_create_vcpu(cpu, errp);
+ if (ret < 0) {
+ goto err;
+ }
+
+ ret = kvm_create_vcpu(cpu);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
+ kvm_arch_vcpu_id(cpu));
+ goto err;
+ }
+
+ ret = map_kvm_run(s, cpu, errp);
+ if (ret < 0) {
+ goto err;
+ }
+
if (s->kvm_dirty_ring_size) {
- /* Use MAP_SHARED to share pages with the kernel */
- cpu->kvm_dirty_gfns = mmap(NULL, s->kvm_dirty_ring_bytes,
- PROT_READ | PROT_WRITE, MAP_SHARED,
- cpu->kvm_fd,
- PAGE_SIZE * KVM_DIRTY_LOG_PAGE_OFFSET);
- if (cpu->kvm_dirty_gfns == MAP_FAILED) {
- ret = -errno;
+ ret = map_kvm_dirty_gfns(s, cpu, errp);
+ if (ret < 0) {
goto err;
}
}
@@ -2716,6 +2837,16 @@ static int kvm_reset_vmfd(MachineState *ms)
}
assert(!err);
+ /*
+ * rebind new vcpu fds with the new kvm fds
+ * These can only be called after kvm_arch_vmfd_change_ops()
+ */
+ ret = kvm_rebind_vcpus(&err);
+ if (ret < 0) {
+ return ret;
+ }
+ assert(!err);
+
/* these can be only called after ram_block_rebind() */
memory_listener_register(&kml->listener, &address_space_memory);
memory_listener_register(&kvm_io_listener, &address_space_io);
--
2.42.0
next prev parent reply other threads:[~2025-12-12 15:09 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-12-12 15:03 [PATCH v1 00/28] Introduce support for confidential guest reset Ani Sinha
2025-12-12 15:03 ` [PATCH v1 01/28] i386/kvm: avoid installing duplicate msr entries in msr_handlers Ani Sinha
2025-12-12 15:03 ` [PATCH v1 02/28] hw/accel: add a per-accelerator callback to change VM accelerator handle Ani Sinha
2025-12-12 15:03 ` [PATCH v1 03/28] system/physmem: add helper to reattach existing memory after KVM VM fd change Ani Sinha
2025-12-12 15:03 ` [PATCH v1 04/28] accel/kvm: add changes required to support KVM VM file descriptor change Ani Sinha
2025-12-12 15:03 ` [PATCH v1 05/28] accel/kvm: mark guest state as unprotected after vm " Ani Sinha
2025-12-12 15:03 ` [PATCH v1 06/28] accel/kvm: add a notifier to indicate KVM VM file descriptor has changed Ani Sinha
2025-12-12 15:03 ` [PATCH v1 07/28] kvm/i386: implement architecture support for kvm file descriptor change Ani Sinha
2025-12-12 15:03 ` [PATCH v1 08/28] hw/i386: refactor x86_bios_rom_init for reuse in confidential guest reset Ani Sinha
2025-12-12 15:03 ` [PATCH v1 09/28] kvm/i386: reload firmware for " Ani Sinha
2025-12-12 15:03 ` [PATCH v1 10/28] accel/kvm: Add notifier to inform that the KVM VM file fd is about to be changed Ani Sinha
2025-12-12 15:03 ` Ani Sinha [this message]
2025-12-12 15:03 ` [PATCH v1 12/28] i386/tdx: refactor TDX firmware memory initialization code into a new function Ani Sinha
2025-12-12 15:03 ` [PATCH v1 13/28] i386/tdx: finalize TDX guest state upon reset Ani Sinha
2025-12-12 15:03 ` [PATCH v1 14/28] i386/tdx: add a pre-vmfd change notifier to reset tdx state Ani Sinha
2025-12-12 15:03 ` [PATCH v1 15/28] i386/sev: add migration blockers only once Ani Sinha
2025-12-12 15:03 ` [PATCH v1 16/28] i386/sev: add notifiers " Ani Sinha
2025-12-12 15:03 ` [PATCH v1 17/28] i386/sev: free existing launch update data and kernel hashes data on init Ani Sinha
2025-12-12 15:03 ` [PATCH v1 18/28] i386/sev: add support for confidential guest reset Ani Sinha
2025-12-12 15:03 ` [PATCH v1 19/28] hw/vfio: generate new file fd for pseudo device and rebind existing descriptors Ani Sinha
2025-12-12 15:03 ` [PATCH v1 20/28] kvm/i8254: add support for confidential guest reset Ani Sinha
2025-12-12 15:03 ` [PATCH v1 21/28] hw/hyperv/vmbus: " Ani Sinha
2025-12-12 15:03 ` [PATCH v1 22/28] accel/kvm: add a per-confidential class callback to unlock guest state Ani Sinha
2025-12-12 15:03 ` [PATCH v1 23/28] kvm/xen-emu: re-initialize capabilities during confidential guest reset Ani Sinha
2025-12-12 15:03 ` [PATCH v1 24/28] kvm/xen_evtchn: add support for " Ani Sinha
2025-12-12 15:03 ` [PATCH v1 25/28] ppc/openpic: create a new openpic device and reattach mem region on coco reset Ani Sinha
2025-12-12 15:03 ` [PATCH v1 26/28] kvm/vcpu: add notifiers to inform vcpu file descriptor change Ani Sinha
2025-12-12 15:03 ` [PATCH v1 27/28] kvm/i386/apic: set local apic after vcpu file descriptors changed Ani Sinha
2025-12-12 15:03 ` [PATCH v1 28/28] kvm/clock: add support for confidential guest reset Ani Sinha
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251212150359.548787-12-anisinha@redhat.com \
--to=anisinha@redhat.com \
--cc=kraxel@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=vkuznets@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).