* [PATCH 24/60] kvm: Move kvm_vcpu spinloop members to struct kvm_vcpu_common
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Joerg Roedel <joerg.roedel@amd.com>
Onlyh one struct kvm_vcpu across all planes can be in a spin-loop.
Move the state to struct kvm_vcpu_common to make detection independent
of the active struct kvm_vcpu.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
include/linux/kvm_host.h | 32 +++++++++++++++----------------
virt/kvm/kvm_main.c | 41 ++++++++++++++++++++++------------------
2 files changed, 39 insertions(+), 34 deletions(-)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9220c452aa3a..f6e8a0b653b3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -350,6 +350,20 @@ struct kvm_vcpu_common {
rwlock_t pid_lock;
int sigset_active;
sigset_t sigset;
+ unsigned int halt_poll_ns;
+
+#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
+ /*
+ * Cpu relax intercept or pause loop exit optimization
+ * in_spin_loop: set when a vcpu does a pause loop exit
+ * or cpu relax intercepted.
+ * dy_eligible: indicates whether vcpu is eligible for directed yield.
+ */
+ struct {
+ bool in_spin_loop;
+ bool dy_eligible;
+ } spin_loop;
+#endif
/* Scheduling state */
#ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -373,8 +387,6 @@ struct kvm_vcpu {
struct kvm_run *run;
- unsigned int halt_poll_ns;
-
u64 plane_requests;
/* S390 only */
@@ -398,18 +410,6 @@ struct kvm_vcpu {
} async_pf;
#endif
-#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
- /*
- * Cpu relax intercept or pause loop exit optimization
- * in_spin_loop: set when a vcpu does a pause loop exit
- * or cpu relax intercepted.
- * dy_eligible: indicates whether vcpu is eligible for directed yield.
- */
- struct {
- bool in_spin_loop;
- bool dy_eligible;
- } spin_loop;
-#endif
struct kvm_vcpu_arch arch;
struct kvm_vcpu_stat stat;
char stats_id[KVM_STATS_NAME_SIZE];
@@ -2500,11 +2500,11 @@ extern struct kvm_device_ops kvm_arm_vgic_v5_ops;
static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
{
- vcpu->spin_loop.in_spin_loop = val;
+ vcpu->common->spin_loop.in_spin_loop = val;
}
static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
{
- vcpu->spin_loop.dy_eligible = val;
+ vcpu->common->spin_loop.dy_eligible = val;
}
#else /* !CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1858880ee3d3..24ff8748a317 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -485,6 +485,9 @@ static int kvm_vcpu_init_common(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned
vcpu->common = no_free_ptr(common);
+ kvm_vcpu_set_in_spin_loop(vcpu, false);
+ kvm_vcpu_set_dy_eligible(vcpu, false);
+
return 0;
out_drop_counter:
@@ -515,8 +518,6 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
vcpu->vcpu_id = id;
kvm_async_pf_vcpu_init(vcpu);
- kvm_vcpu_set_in_spin_loop(vcpu, false);
- kvm_vcpu_set_dy_eligible(vcpu, false);
vcpu->last_used_slot = NULL;
vcpu->plane_level = 0;
@@ -3721,9 +3722,10 @@ void kvm_sigset_deactivate(struct kvm_vcpu *vcpu)
static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
{
+ struct kvm_vcpu_common *common = vcpu->common;
unsigned int old, val, grow, grow_start;
- old = val = vcpu->halt_poll_ns;
+ old = val = common->halt_poll_ns;
grow_start = READ_ONCE(halt_poll_ns_grow_start);
grow = READ_ONCE(halt_poll_ns_grow);
if (!grow)
@@ -3733,16 +3735,17 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
if (val < grow_start)
val = grow_start;
- vcpu->halt_poll_ns = val;
+ common->halt_poll_ns = val;
out:
trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
}
static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
{
+ struct kvm_vcpu_common *common = vcpu->common;
unsigned int old, val, shrink, grow_start;
- old = val = vcpu->halt_poll_ns;
+ old = val = common->halt_poll_ns;
shrink = READ_ONCE(halt_poll_ns_shrink);
grow_start = READ_ONCE(halt_poll_ns_grow_start);
if (shrink == 0)
@@ -3753,7 +3756,7 @@ static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
if (val < grow_start)
val = 0;
- vcpu->halt_poll_ns = val;
+ common->halt_poll_ns = val;
trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old);
}
@@ -3864,19 +3867,20 @@ void kvm_vcpu_halt(struct kvm_vcpu *vcpu)
{
unsigned int max_halt_poll_ns = kvm_vcpu_max_halt_poll_ns(vcpu);
bool halt_poll_allowed = !kvm_arch_no_poll(vcpu);
+ struct kvm_vcpu_common *common = vcpu->common;
ktime_t start, cur, poll_end;
bool waited = false;
bool do_halt_poll;
u64 halt_ns;
- if (vcpu->halt_poll_ns > max_halt_poll_ns)
- vcpu->halt_poll_ns = max_halt_poll_ns;
+ if (common->halt_poll_ns > max_halt_poll_ns)
+ common->halt_poll_ns = max_halt_poll_ns;
- do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns;
+ do_halt_poll = halt_poll_allowed && common->halt_poll_ns;
start = cur = poll_end = ktime_get();
if (do_halt_poll) {
- ktime_t stop = ktime_add_ns(start, vcpu->halt_poll_ns);
+ ktime_t stop = ktime_add_ns(start, common->halt_poll_ns);
do {
if (kvm_vcpu_check_block(vcpu) < 0)
@@ -3914,18 +3918,18 @@ void kvm_vcpu_halt(struct kvm_vcpu *vcpu)
if (!vcpu_valid_wakeup(vcpu)) {
shrink_halt_poll_ns(vcpu);
} else if (max_halt_poll_ns) {
- if (halt_ns <= vcpu->halt_poll_ns)
+ if (halt_ns <= common->halt_poll_ns)
;
/* we had a long block, shrink polling */
- else if (vcpu->halt_poll_ns &&
+ else if (common->halt_poll_ns &&
halt_ns > max_halt_poll_ns)
shrink_halt_poll_ns(vcpu);
/* we had a short halt and our poll time is too small */
- else if (vcpu->halt_poll_ns < max_halt_poll_ns &&
+ else if (common->halt_poll_ns < max_halt_poll_ns &&
halt_ns < max_halt_poll_ns)
grow_halt_poll_ns(vcpu);
} else {
- vcpu->halt_poll_ns = 0;
+ common->halt_poll_ns = 0;
}
}
@@ -4046,13 +4050,14 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_yield_to);
static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
+ struct kvm_vcpu_common *common = vcpu->common;
bool eligible;
- eligible = !vcpu->spin_loop.in_spin_loop ||
- vcpu->spin_loop.dy_eligible;
+ eligible = !common->spin_loop.in_spin_loop ||
+ common->spin_loop.dy_eligible;
- if (vcpu->spin_loop.in_spin_loop)
- kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
+ if (common->spin_loop.in_spin_loop)
+ kvm_vcpu_set_dy_eligible(vcpu, !common->spin_loop.dy_eligible);
return eligible;
#else
--
2.53.0
^ permalink raw reply related
* [PATCH 38/60] kvm: Make KVM_SIGNAL_MSI per plane
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Paolo Bonzini <pbonzini@redhat.com>
Allow the KVM_SIGNAL_MSI ioctl for every plane instead of per VM.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Co-developed-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
include/linux/kvm_host.h | 2 +-
virt/kvm/irqchip.c | 4 ++--
virt/kvm/kvm_main.c | 25 ++++++++++++++-----------
3 files changed, 17 insertions(+), 14 deletions(-)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index cfb6911d6771..05a10836d92d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2370,7 +2370,7 @@ static inline int kvm_init_irq_routing(struct kvm *kvm)
#endif
-int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
+int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi, unsigned plane_level);
void kvm_eventfd_init(struct kvm *kvm);
int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 14480d1df4f9..a4fea7d8dde6 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -45,7 +45,7 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
return irq_rt->chip[irqchip][pin];
}
-int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
+int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi, unsigned plane_level)
{
struct kvm_kernel_irq_routing_entry route;
@@ -57,7 +57,7 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
route.msi.data = msi->data;
route.msi.flags = msi->flags;
route.msi.devid = msi->devid;
- route.msi.plane_level = 0;
+ route.msi.plane_level = plane_level;
return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false);
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a30123b77112..dc59f2f9d405 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4907,6 +4907,17 @@ static long __kvm_plane_ioctl(struct kvm_plane *plane, unsigned int ioctl, unsig
case KVM_CREATE_VCPU:
r = kvm_plane_ioctl_create_vcpu(plane, arg);
break;
+#ifdef CONFIG_HAVE_KVM_MSI
+ case KVM_SIGNAL_MSI: {
+ void __user *argp = (void __user *)arg;
+ struct kvm_msi msi;
+
+ if (copy_from_user(&msi, argp, sizeof(msi)))
+ return -EFAULT;
+ r = kvm_send_userspace_msi(plane->kvm, &msi, plane->level);
+ break;
+ }
+#endif
default:
r = -ENOTTY;
}
@@ -5493,6 +5504,9 @@ static long kvm_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_create_plane(kvm, arg);
break;
case KVM_CREATE_VCPU:
+#ifdef CONFIG_HAVE_KVM_MSI
+ case KVM_SIGNAL_MSI:
+#endif
r = __kvm_plane_ioctl(kvm->planes[0], ioctl, arg);
break;
case KVM_ENABLE_CAP: {
@@ -5597,17 +5611,6 @@ static long kvm_vm_ioctl(struct file *filp,
r = kvm_ioeventfd(kvm, &data);
break;
}
-#ifdef CONFIG_HAVE_KVM_MSI
- case KVM_SIGNAL_MSI: {
- struct kvm_msi msi;
-
- r = -EFAULT;
- if (copy_from_user(&msi, argp, sizeof(msi)))
- goto out;
- r = kvm_send_userspace_msi(kvm, &msi);
- break;
- }
-#endif
#ifdef __KVM_HAVE_IRQ_LINE
case KVM_IRQ_LINE_STATUS:
case KVM_IRQ_LINE: {
--
2.53.0
^ permalink raw reply related
* [PATCH 50/60] kvm: x86: Make KVM_REQ_UPDATE_PROTECTED_GUEST_STATE per plane
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Joerg Roedel <joerg.roedel@amd.com>
The VMSAs are per plane, so this request must be too.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/include/asm/kvm_host.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0955097aca9c..0327b77e56b7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -129,7 +129,7 @@
#define KVM_REQ_HV_TLB_FLUSH \
KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE \
- KVM_ARCH_REQ_FLAGS(34, KVM_REQUEST_WAIT)
+ KVM_ARCH_REQ_FLAGS(34, KVM_REQUEST_WAIT | KVM_REQUEST_PER_PLANE)
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
--
2.53.0
^ permalink raw reply related
* [PATCH 36/60] kvm: Add plane_level to kvm_kernel_irq_routing_entry
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Joerg Roedel <joerg.roedel@amd.com>
Track the target plane-level of MSI irqs in struct
kvm_kernel_irq_routing_entry. This will be used to send MSI IRQs to
the right plane-level once planes are supported.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/arm64/kvm/vgic/vgic-irqfd.c | 1 +
arch/loongarch/kvm/irqfd.c | 1 +
arch/powerpc/kvm/mpic.c | 1 +
arch/riscv/kvm/vm.c | 1 +
arch/x86/kvm/irq.c | 1 +
include/linux/kvm_host.h | 1 +
virt/kvm/irqchip.c | 1 +
7 files changed, 7 insertions(+)
diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c
index b9b86e3a6c86..479b896c8954 100644
--- a/arch/arm64/kvm/vgic/vgic-irqfd.c
+++ b/arch/arm64/kvm/vgic/vgic-irqfd.c
@@ -57,6 +57,7 @@ int kvm_set_routing_entry(struct kvm *kvm,
e->msi.data = ue->u.msi.data;
e->msi.flags = ue->flags;
e->msi.devid = ue->u.msi.devid;
+ e->msi.plane_level = 0;
break;
default:
goto out;
diff --git a/arch/loongarch/kvm/irqfd.c b/arch/loongarch/kvm/irqfd.c
index f4f953b22419..50f0c32df46c 100644
--- a/arch/loongarch/kvm/irqfd.c
+++ b/arch/loongarch/kvm/irqfd.c
@@ -60,6 +60,7 @@ int kvm_set_routing_entry(struct kvm *kvm,
e->msi.address_lo = ue->u.msi.address_lo;
e->msi.address_hi = ue->u.msi.address_hi;
e->msi.data = ue->u.msi.data;
+ e->msi.plane_level = 0;
return 0;
default:
return -EINVAL;
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index 3070f36d9fb8..0f568f5fff8b 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -1841,6 +1841,7 @@ int kvm_set_routing_entry(struct kvm *kvm,
e->msi.address_lo = ue->u.msi.address_lo;
e->msi.address_hi = ue->u.msi.address_hi;
e->msi.data = ue->u.msi.data;
+ e->msi.plane_level = 0;
break;
default:
goto out;
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index a9f083feeb76..f518247e699b 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -138,6 +138,7 @@ int kvm_set_routing_entry(struct kvm *kvm,
e->msi.data = ue->u.msi.data;
e->msi.flags = ue->flags;
e->msi.devid = ue->u.msi.devid;
+ e->msi.plane_level = 0;
break;
default:
goto out;
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 9519fec09ee6..b7e08eddb765 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -332,6 +332,7 @@ int kvm_set_routing_entry(struct kvm *kvm,
e->msi.address_lo = ue->u.msi.address_lo;
e->msi.address_hi = ue->u.msi.address_hi;
e->msi.data = ue->u.msi.data;
+ e->msi.plane_level = 0;
if (kvm_msi_route_invalid(kvm, e))
return -EINVAL;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e3611e6cc3e4..16dcca3132d3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -782,6 +782,7 @@ struct kvm_kernel_irq_routing_entry {
u32 data;
u32 flags;
u32 devid;
+ unsigned plane_level;
} msi;
struct kvm_s390_adapter_int adapter;
struct kvm_hv_sint hv_sint;
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 462c70621247..ae47e56176f1 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -57,6 +57,7 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
route.msi.data = msi->data;
route.msi.flags = msi->flags;
route.msi.devid = msi->devid;
+ route.msi.plane_level = 0;
return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false);
}
--
2.53.0
^ permalink raw reply related
* [PATCH 40/60] kvm: x86: Handle IOAPIC EOIs per plane
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Joerg Roedel <joerg.roedel@amd.com>
Make sure the handling of IOAPIC EOIs is aware of planes.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/include/asm/kvm_host.h | 2 +-
arch/x86/kvm/irq.c | 3 ++-
arch/x86/kvm/x86.c | 10 ++++++++--
3 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1393566741a0..134bc02962fd 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -109,7 +109,7 @@
#define KVM_REQ_APIC_PAGE_RELOAD \
KVM_ARCH_REQ_FLAGS(17, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_HV_CRASH KVM_ARCH_REQ(18)
-#define KVM_REQ_IOAPIC_EOI_EXIT KVM_ARCH_REQ(19)
+#define KVM_REQ_IOAPIC_EOI_EXIT KVM_ARCH_PLANE_REQ(19)
#define KVM_REQ_HV_RESET KVM_ARCH_REQ(20)
#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 90e2d2db2123..bc748a4b7cbd 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -400,7 +400,8 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
hlist_for_each_entry(entry, &table->map[i], link) {
struct kvm_lapic_irq irq;
- if (entry->type != KVM_IRQ_ROUTING_MSI)
+ if (entry->type != KVM_IRQ_ROUTING_MSI ||
+ entry->msi.plane_level != vcpu->plane_level)
continue;
kvm_msi_to_lapic_irq(vcpu->kvm, entry, &irq);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 623838885753..a158740a6fc1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11336,8 +11336,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto out;
}
}
- if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
- vcpu_scan_ioapic(vcpu);
+ if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) {
+ struct kvm_vcpu *v;
+ unsigned i;
+
+ vcpu_for_each_plane(vcpu->common, i, v) {
+ vcpu_scan_ioapic(v);
+ }
+ }
if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu))
vcpu_load_eoi_exitmap(vcpu);
if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
--
2.53.0
^ permalink raw reply related
* [PATCH 35/60] kvm: Add VCPU plane-scheduling state and helpers
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Joerg Roedel <joerg.roedel@amd.com>
The algorithm is to always run the lowest runnable plane. Plane
switches are done by stopping the current plane and setting another
runnable.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
include/linux/kvm_host.h | 16 ++++++++++++++
virt/kvm/kvm_main.c | 45 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 61 insertions(+)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5c3f9dfa15ea..e3611e6cc3e4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -168,6 +168,7 @@ static inline bool kvm_is_error_gpa(gpa_t gpa)
#define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_UNBLOCK 2
#define KVM_REQ_DIRTY_RING_SOFT_FULL 3
+#define KVM_REQ_PLANE_RESCHED 4
#define KVM_REQUEST_ARCH_BASE 8
/*
@@ -324,6 +325,8 @@ struct kvm_mmio_fragment {
unsigned int len;
};
+
+
struct kvm_vcpu_common {
struct kvm *kvm;
@@ -381,6 +384,8 @@ struct kvm_vcpu_common {
struct kvm_dirty_ring dirty_ring;
+ bool plane_switch;
+
struct kvm_vcpu_arch_common arch;
};
@@ -388,6 +393,12 @@ struct kvm_vcpu_common {
for ((i) = 0; (i) < KVM_MAX_PLANES; ++(i)) \
if (((v) = common->vcpus[(i)]) != NULL)
+/* Tracked per plane-VCPU - used for deciding which plane-vcpu to run */
+enum kvm_vcpu_state {
+ STOPPED,
+ RUNNABLE,
+};
+
struct kvm_vcpu {
struct kvm *kvm;
struct kvm_plane *plane;
@@ -401,6 +412,7 @@ struct kvm_vcpu {
struct kvm_run *run;
u64 plane_requests;
+ enum kvm_vcpu_state plane_state;
/* S390 only */
bool valid_wakeup;
@@ -440,6 +452,10 @@ struct kvm_vcpu {
unsigned plane_level;
};
+void kvm_vcpu_set_plane_runnable(struct kvm_vcpu *vcpu);
+void kvm_vcpu_set_plane_stopped(struct kvm_vcpu *vcpu);
+struct kvm_vcpu *kvm_vcpu_select_plane(struct kvm_vcpu *vcpu);
+
static inline bool kvm_vcpu_wants_to_run(struct kvm_vcpu *vcpu)
{
return vcpu->common->wants_to_run;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9d30fd85ce5f..a30123b77112 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4397,6 +4397,7 @@ static int kvm_plane_ioctl_create_vcpu(struct kvm_plane *plane, unsigned long id
vcpu->vcpu_idx = vcpu->common->vcpu_idx;
vcpu->plane = plane;
vcpu->plane_level = plane->level;
+ vcpu->plane_state = STOPPED;
vcpu->run = vcpu->common->run;
kvm_vcpu_init(vcpu, kvm, id);
@@ -4938,6 +4939,50 @@ static struct file_operations kvm_plane_fops = {
KVM_COMPAT(kvm_plane_ioctl),
};
+void kvm_vcpu_set_plane_runnable(struct kvm_vcpu *vcpu)
+{
+ vcpu->plane_state = RUNNABLE;
+ vcpu->common->plane_switch = true;
+ kvm_make_request(KVM_REQ_PLANE_RESCHED, vcpu);
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_set_plane_runnable);
+
+void kvm_vcpu_set_plane_stopped(struct kvm_vcpu *vcpu)
+{
+ vcpu->plane_state = STOPPED;
+ kvm_make_request(KVM_REQ_PLANE_RESCHED, vcpu);
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_set_plane_stopped);
+
+struct kvm_vcpu *kvm_vcpu_select_plane(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_common *common = vcpu->common;
+ struct kvm_vcpu *ret = NULL;
+ unsigned i;
+
+ for (i = 0; i < KVM_MAX_PLANES; i++) {
+ if (common->vcpus[i] == NULL)
+ continue;
+
+ if (common->vcpus[i]->plane_state == RUNNABLE) {
+ ret = common->vcpus[i];
+ break;
+ }
+ }
+
+ if (ret == NULL) {
+ ret = common->vcpus[0];
+ ret->plane_state = RUNNABLE;
+ }
+
+ common->current_vcpu = ret;
+
+ common->plane_switch = false;
+
+ return ret;
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_select_plane);
+
static int kvm_device_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct kvm_device *dev = filp->private_data;
--
2.53.0
^ permalink raw reply related
* [PATCH 19/60] kvm: Introduce accessors for kvm_vcpu->mode
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Joerg Roedel <joerg.roedel@amd.com>
Introduce accessors to make it easier to move this member of struct
kvm_vcpu.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/arm64/kvm/arm.c | 6 +++---
arch/loongarch/kvm/vcpu.c | 6 +++---
arch/mips/kvm/mips.c | 6 +++---
arch/powerpc/kvm/book3s_pr.c | 2 +-
arch/powerpc/kvm/booke.c | 2 +-
arch/powerpc/kvm/powerpc.c | 2 +-
arch/riscv/kvm/vcpu.c | 6 +++---
arch/x86/kvm/lapic.c | 3 ++-
arch/x86/kvm/mmu/mmu.c | 4 ++--
arch/x86/kvm/svm/svm.c | 2 +-
arch/x86/kvm/vmx/common.h | 2 +-
arch/x86/kvm/x86.c | 8 ++++----
include/linux/kvm_host.h | 25 +++++++++++++++++++++++++
virt/kvm/kvm_main.c | 4 ++--
14 files changed, 52 insertions(+), 26 deletions(-)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 295d7f19e4de..001f83f737ea 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1298,10 +1298,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* See the comment in kvm_vcpu_exiting_guest_mode() and
* Documentation/virt/kvm/vcpu-requests.rst
*/
- smp_store_mb(vcpu->mode, IN_GUEST_MODE);
+ kvm_vcpu_set_mode_mb(vcpu, IN_GUEST_MODE);
if (ret <= 0 || kvm_vcpu_exit_request(vcpu, &ret)) {
- vcpu->mode = OUTSIDE_GUEST_MODE;
+ kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE);
isb(); /* Ensure work in x_flush_hwstate is committed */
if (kvm_vcpu_has_pmu(vcpu))
kvm_pmu_sync_hwstate(vcpu);
@@ -1323,7 +1323,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
ret = kvm_arm_vcpu_enter_exit(vcpu);
- vcpu->mode = OUTSIDE_GUEST_MODE;
+ kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE);
vcpu->stat.exits++;
/*
* Back from guest
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index bde8b68b8273..bab3c66ae58d 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -311,7 +311,7 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu)
kvm_deliver_intr(vcpu);
kvm_deliver_exception(vcpu);
/* Make sure the vcpu mode has been written */
- smp_store_mb(vcpu->mode, IN_GUEST_MODE);
+ kvm_vcpu_set_mode_mb(vcpu, IN_GUEST_MODE);
kvm_check_vpid(vcpu);
/*
@@ -329,7 +329,7 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu)
kvm_make_request(KVM_REQ_PMU, vcpu);
}
/* make sure the vcpu mode has been written */
- smp_store_mb(vcpu->mode, OUTSIDE_GUEST_MODE);
+ kvm_vcpu_set_mode_mb(vcpu, OUTSIDE_GUEST_MODE);
local_irq_enable();
ret = -EAGAIN;
}
@@ -348,7 +348,7 @@ static int kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
u32 intr = estat & CSR_ESTAT_IS;
u32 ecode = (estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT;
- vcpu->mode = OUTSIDE_GUEST_MODE;
+ kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE);
/* Set a default exit reason */
run->exit_reason = KVM_EXIT_UNKNOWN;
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 6469ec246dd6..776aba0af096 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -448,7 +448,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* flush request while the requester sees the VCPU as outside of guest
* mode and not needing an IPI.
*/
- smp_store_mb(vcpu->mode, IN_GUEST_MODE);
+ kvm_vcpu_set_mode_mb(vcpu, IN_GUEST_MODE);
r = kvm_mips_vcpu_enter_exit(vcpu);
@@ -1175,7 +1175,7 @@ static int __kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
u32 inst;
int ret = RESUME_GUEST;
- vcpu->mode = OUTSIDE_GUEST_MODE;
+ kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE);
/* Set a default exit reason */
run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -1329,7 +1329,7 @@ static int __kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
* or we could miss a TLB flush request while the requester sees
* the VCPU as outside of guest mode and not needing an IPI.
*/
- smp_store_mb(vcpu->mode, IN_GUEST_MODE);
+ kvm_vcpu_set_mode_mb(vcpu, IN_GUEST_MODE);
kvm_mips_callbacks->vcpu_reenter(vcpu);
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 2ba2dd26a7ea..0a14870f1d33 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1852,7 +1852,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu)
srr_regs_clobbered();
out:
- vcpu->mode = OUTSIDE_GUEST_MODE;
+ kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE);
return ret;
}
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index f3ddb24ece74..08b3180adc83 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -823,7 +823,7 @@ int kvmppc_vcpu_run(struct kvm_vcpu *vcpu)
#endif
out:
- vcpu->mode = OUTSIDE_GUEST_MODE;
+ kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE);
return ret;
}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 800867c164c6..5d94e0f676ec 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -98,7 +98,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
break;
}
- vcpu->mode = IN_GUEST_MODE;
+ kvm_vcpu_set_mode(vcpu, IN_GUEST_MODE);
/*
* Reading vcpu->requests must happen after setting vcpu->mode,
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 8519a5bfbdc4..66cde226eb87 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -903,7 +903,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* See the comment in kvm_vcpu_exiting_guest_mode() and
* Documentation/virt/kvm/vcpu-requests.rst
*/
- vcpu->mode = IN_GUEST_MODE;
+ kvm_vcpu_set_mode(vcpu, IN_GUEST_MODE);
kvm_vcpu_srcu_read_unlock(vcpu);
smp_mb__after_srcu_read_unlock();
@@ -920,7 +920,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
kvm_request_pending(vcpu) ||
xfer_to_guest_mode_work_pending()) {
- vcpu->mode = OUTSIDE_GUEST_MODE;
+ kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE);
local_irq_enable();
preempt_enable();
kvm_vcpu_srcu_read_lock(vcpu);
@@ -941,7 +941,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_riscv_vcpu_enter_exit(vcpu, &trap);
- vcpu->mode = OUTSIDE_GUEST_MODE;
+ kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE);
vcpu->stat.exits++;
/* Syncup interrupts state with HW */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ab40a2e4ab9d..1b763f164951 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -200,7 +200,8 @@ static bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
{
- return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
+ return kvm_can_post_timer_interrupt(vcpu) &&
+ kvm_vcpu_mode(vcpu) == IN_GUEST_MODE;
}
static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f0144ae8d891..0cec559f59b1 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -574,7 +574,7 @@ static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
* Make sure a following spte read is not reordered ahead of the write
* to vcpu->mode.
*/
- smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES);
+ kvm_vcpu_set_mode_mb(vcpu, READING_SHADOW_PAGE_TABLES);
}
}
@@ -588,7 +588,7 @@ static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
* reads to sptes. If it does, kvm_mmu_commit_zap_page() can see us
* OUTSIDE_GUEST_MODE and proceed to free the shadow page table.
*/
- smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE);
+ kvm_vcpu_set_mode_release(vcpu, OUTSIDE_GUEST_MODE);
local_irq_enable();
}
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index f5cc30a6732f..e8ad880a4266 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3870,7 +3870,7 @@ void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
* apic->apicv_active must be read after vcpu->mode.
* Pairs with smp_store_release in vcpu_enter_guest.
*/
- bool in_guest_mode = (smp_load_acquire(&vcpu->mode) == IN_GUEST_MODE);
+ bool in_guest_mode = (kvm_vcpu_mode_acquire(vcpu) == IN_GUEST_MODE);
/* Note, this is called iff the local APIC is in-kernel. */
if (!READ_ONCE(vcpu->arch.apic->apicv_active)) {
diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h
index 412d0829d7a2..fe480f7cf55e 100644
--- a/arch/x86/kvm/vmx/common.h
+++ b/arch/x86/kvm/vmx/common.h
@@ -112,7 +112,7 @@ static inline void kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
int pi_vec)
{
#ifdef CONFIG_SMP
- if (vcpu->mode == IN_GUEST_MODE) {
+ if (kvm_vcpu_mode(vcpu) == IN_GUEST_MODE) {
/*
* The vector of the virtual has already been set in the PIR.
* Send a notification event to deliver the virtual interrupt
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2a87359cf42f..50601ac2828f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2272,7 +2272,7 @@ static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
{
xfer_to_guest_mode_prepare();
- return READ_ONCE(vcpu->mode) == EXITING_GUEST_MODE ||
+ return kvm_vcpu_mode(vcpu) == EXITING_GUEST_MODE ||
kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending();
}
@@ -11391,7 +11391,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
local_irq_disable();
/* Store vcpu->apicv_active before vcpu->mode. */
- smp_store_release(&vcpu->mode, IN_GUEST_MODE);
+ kvm_vcpu_set_mode_release(vcpu, IN_GUEST_MODE);
kvm_vcpu_srcu_read_unlock(vcpu);
@@ -11420,7 +11420,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_x86_call(sync_pir_to_irr)(vcpu);
if (kvm_vcpu_exit_request(vcpu)) {
- vcpu->mode = OUTSIDE_GUEST_MODE;
+ kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE);
smp_wmb();
local_irq_enable();
preempt_enable();
@@ -11539,7 +11539,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->arch.last_vmentry_cpu = vcpu->cpu;
vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
- vcpu->mode = OUTSIDE_GUEST_MODE;
+ kvm_vcpu_set_mode(vcpu, OUTSIDE_GUEST_MODE);
smp_wmb();
kvm_load_xfeatures(vcpu, false);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c08ede1cefd2..45286b3b35c9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -440,6 +440,31 @@ static inline bool kvm_vcpu_scheduled_out(struct kvm_vcpu *vcpu)
return vcpu->common->scheduled_out;
}
+static inline int kvm_vcpu_mode(struct kvm_vcpu *vcpu)
+{
+ return vcpu->mode;
+}
+
+static inline int kvm_vcpu_mode_acquire(struct kvm_vcpu *vcpu)
+{
+ return smp_load_acquire(&vcpu->mode);
+}
+
+static inline void kvm_vcpu_set_mode(struct kvm_vcpu *vcpu, int mode)
+{
+ vcpu->mode = mode;
+}
+
+static inline void kvm_vcpu_set_mode_mb(struct kvm_vcpu *vcpu, int mode)
+{
+ smp_store_mb(vcpu->mode, mode);
+}
+
+static inline void kvm_vcpu_set_mode_release(struct kvm_vcpu *vcpu, int mode)
+{
+ smp_store_release(&vcpu->mode, mode);
+}
+
/*
* Start accounting time towards a guest.
* Must be called before entering guest context.
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 11e0d4af82df..7ea20d96bc89 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3960,8 +3960,8 @@ void __kvm_vcpu_kick(struct kvm_vcpu *vcpu, bool wait)
* within the vCPU thread itself.
*/
if (vcpu == kvm_get_running_vcpu()) {
- if (vcpu->mode == IN_GUEST_MODE)
- WRITE_ONCE(vcpu->mode, EXITING_GUEST_MODE);
+ if (kvm_vcpu_mode(vcpu) == IN_GUEST_MODE)
+ kvm_vcpu_set_mode(vcpu, EXITING_GUEST_MODE);
goto out;
}
--
2.53.0
^ permalink raw reply related
* [PATCH 49/60] kvm: x86: Allow hardware backend to overwrite struct kvm_plane allocation
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Joerg Roedel <joerg.roedel@amd.com>
Allow the hardware backend implementations to allocate the struct
kvm_plane instances so that they can carry hardware specific
information along them.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/include/asm/kvm-x86-ops.h | 2 ++
arch/x86/include/asm/kvm_host.h | 3 +++
arch/x86/kvm/svm/svm.c | 3 +++
arch/x86/kvm/vmx/main.c | 5 ++++-
arch/x86/kvm/x86.c | 16 ++++++++++++++--
arch/x86/kvm/x86.h | 4 ++++
6 files changed, 30 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index c8bff1e9325e..207d56d12459 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -150,6 +150,8 @@ KVM_X86_OP_OPTIONAL(alloc_apic_backing_page)
KVM_X86_OP_OPTIONAL_RET0(gmem_prepare)
KVM_X86_OP_OPTIONAL_RET0(gmem_max_mapping_level)
KVM_X86_OP_OPTIONAL(gmem_invalidate)
+KVM_X86_OP(alloc_plane)
+KVM_X86_OP(free_plane)
#endif
#undef KVM_X86_OP
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c2651774d785..0955097aca9c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2011,6 +2011,9 @@ struct kvm_x86_ops {
int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order);
void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end);
int (*gmem_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn, bool is_private);
+
+ struct kvm_plane *(*alloc_plane)(void);
+ void (*free_plane)(struct kvm_plane *);
};
struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 0b57dde29e40..2a92d8d18d7c 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -5445,6 +5445,9 @@ struct kvm_x86_ops svm_x86_ops __initdata = {
.gmem_prepare = sev_gmem_prepare,
.gmem_invalidate = sev_gmem_invalidate,
.gmem_max_mapping_level = sev_gmem_max_mapping_level,
+
+ .alloc_plane = x86_alloc_plane,
+ .free_plane = x86_free_plane,
};
/*
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index f9c4703dda54..a2fc4eeeca1d 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -1030,7 +1030,10 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.vcpu_mem_enc_ioctl = vt_op_tdx_only(vcpu_mem_enc_ioctl),
.vcpu_mem_enc_unlocked_ioctl = vt_op_tdx_only(vcpu_mem_enc_unlocked_ioctl),
- .gmem_max_mapping_level = vt_op_tdx_only(gmem_max_mapping_level)
+ .gmem_max_mapping_level = vt_op_tdx_only(gmem_max_mapping_level),
+
+ .alloc_plane = x86_alloc_plane,
+ .free_plane = x86_free_plane,
};
struct kvm_x86_init_ops vt_init_ops __initdata = {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b9828cd31136..5f48392d4738 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -487,18 +487,30 @@ unsigned kvm_arch_max_planes(struct kvm *kvm)
return 1;
}
-struct kvm_plane *kvm_alloc_plane(void)
+struct kvm_plane *x86_alloc_plane(void)
{
/* For better type checking, do not return kzalloc() value directly */
struct kvm_plane *plane = kzalloc(sizeof(*plane), GFP_KERNEL_ACCOUNT);
return plane;
}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(x86_alloc_plane);
-void kvm_free_plane(struct kvm_plane *plane)
+void x86_free_plane(struct kvm_plane *plane)
{
kfree(plane);
}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(x86_free_plane);
+
+struct kvm_plane *kvm_alloc_plane(void)
+{
+ return kvm_x86_call(alloc_plane)();
+}
+
+void kvm_free_plane(struct kvm_plane *plane)
+{
+ kvm_x86_call(free_plane)(plane);
+}
/*
* All feature MSRs except uCode revID, which tracks the currently loaded uCode
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 38a905fa86de..812bd6004a4c 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -797,4 +797,8 @@ static inline bool kvm_is_valid_u_s_cet(struct kvm_vcpu *vcpu, u64 data)
return true;
}
+
+struct kvm_plane *x86_alloc_plane(void);
+void x86_free_plane(struct kvm_plane *plane);
+
#endif
--
2.53.0
^ permalink raw reply related
* [PATCH 56/60] kvm: svm: Implement GET_AP_APIC_IDS NAE event
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel, Carlos López
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Tom Lendacky <thomas.lendacky@amd.com>
Implement the GET_APIC_IDS NAE event to gather and return the list of
APIC IDs for all vCPUs in the guest.
Since it is now possible to launch vCPUs without going through the
LAUNCH_UPDATE process, be sure to mark the guest state protected and to
enable LBR virtualization.
Since it is now possible to launch vCPUs by APIC ID before the first
INIT-SIPI request, be sure to check for the AP create event in the
kvm_arch_vcpu_ioctl_run() loop when the AP is in the uninitialized state.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Co-developed-by: Joerg Roedel <joerg.roedel@amd.com>
Co-developed-by: Carlos López <clopez@suse.de>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/include/asm/sev-common.h | 1 +
arch/x86/include/uapi/asm/svm.h | 1 +
arch/x86/kvm/svm/sev.c | 87 +++++++++++++++++++++++++++++--
3 files changed, 86 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index ee17a3541b55..cedb7ea91da5 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -137,6 +137,7 @@ enum psc_op {
#define GHCB_HV_FT_SNP BIT_ULL(0)
#define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1)
#define GHCB_HV_FT_SNP_RINJ (BIT_ULL(2) | GHCB_HV_FT_SNP_AP_CREATION)
+#define GHCB_HV_FT_APIC_ID_LIST BIT_ULL(4)
#define GHCB_HV_FT_SNP_MULTI_VMPL BIT_ULL(5)
/*
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index d281dd21c540..91395b82eadd 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -123,6 +123,7 @@
#define SVM_VMGEXIT_HVDB_QUERY 2
#define SVM_VMGEXIT_HVDB_CLEAR 3
#define SVM_VMGEXIT_HV_IPI 0x80000015ull
+#define SVM_VMGEXIT_GET_APIC_IDS 0x80000017ull
#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018ull
#define SVM_VMGEXIT_SAVIC 0x8000001aull
#define SVM_VMGEXIT_SAVIC_REGISTER_GPA 0
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 12b039823c1c..c0b2879f8e9f 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -40,9 +40,10 @@
#define GHCB_VERSION_MAX 2ULL
#define GHCB_VERSION_MIN 1ULL
-#define GHCB_HV_FT_SUPPORTED (GHCB_HV_FT_SNP | \
- GHCB_HV_FT_SNP_AP_CREATION | \
- GHCB_HV_FT_SNP_RINJ)
+#define GHCB_HV_FT_SUPPORTED (GHCB_HV_FT_SNP | \
+ GHCB_HV_FT_SNP_AP_CREATION | \
+ GHCB_HV_FT_SNP_RINJ | \
+ GHCB_HV_FT_APIC_ID_LIST)
/*
* The GHCB spec essentially states that all non-zero error codes other than
@@ -3518,6 +3519,10 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
if (!kvm_ghcb_rax_is_valid(svm))
goto vmgexit_err;
break;
+ case SVM_VMGEXIT_GET_APIC_IDS:
+ if (!kvm_ghcb_rax_is_valid(svm))
+ goto vmgexit_err;
+ break;
case SVM_VMGEXIT_NMI_COMPLETE:
case SVM_VMGEXIT_AP_HLT_LOOP:
case SVM_VMGEXIT_AP_JUMP_TABLE:
@@ -4439,6 +4444,78 @@ static int sev_snp_hv_ipi(struct vcpu_svm *svm)
return 0;
}
+struct sev_apic_id_desc {
+ u32 num_entries;
+ u32 apic_ids[];
+};
+
+static void sev_get_apic_ids(struct vcpu_svm *svm)
+{
+ struct ghcb *ghcb = svm->sev_es.ghcb;
+ struct kvm_vcpu *vcpu = &svm->vcpu, *loop_vcpu;
+ struct kvm *kvm = vcpu->kvm;
+ unsigned int id_desc_size;
+ struct sev_apic_id_desc *desc;
+ struct page *page;
+ gpa_t gpa;
+ u64 pages;
+ unsigned long i;
+ int n;
+
+ pages = vcpu->arch.regs[VCPU_REGS_RAX];
+
+ /* Each APIC ID is 32-bits in size, so make sure there is room */
+ n = atomic_read(&kvm->online_vcpus);
+ /*TODO: is this possible? */
+ if (n < 0)
+ return;
+
+ id_desc_size = sizeof(*desc);
+ id_desc_size += n * sizeof(desc->apic_ids[0]);
+ if (id_desc_size > (pages * PAGE_SIZE)) {
+ vcpu->arch.regs[VCPU_REGS_RAX] = PFN_UP(id_desc_size);
+ return;
+ }
+
+ gpa = svm->vmcb->control.exit_info_1;
+
+ ghcb_set_sw_exit_info_1(ghcb, 2);
+ ghcb_set_sw_exit_info_2(ghcb, 5);
+
+ if (!page_address_valid(vcpu, gpa))
+ return;
+
+ page = gfn_to_page(kvm, gpa_to_gfn(gpa));
+ kvm_release_page_unused(page);
+ if (!page)
+ return;
+
+ if (!pages)
+ return;
+
+ /* Allocate a buffer to hold the APIC IDs */
+ desc = kvzalloc(id_desc_size, GFP_KERNEL_ACCOUNT);
+ if (!desc)
+ return;
+
+ desc->num_entries = n;
+ kvm_for_each_vcpu(i, loop_vcpu, kvm) {
+ /*TODO: is this possible? */
+ if (i >= n)
+ break;
+
+ desc->apic_ids[i] = loop_vcpu->vcpu_id;
+ }
+
+ if (!kvm_write_guest(kvm, gpa, desc, id_desc_size)) {
+ /* IDs were successfully written */
+ ghcb_set_sw_exit_info_1(ghcb, 0);
+ ghcb_set_sw_exit_info_2(ghcb, 0);
+ }
+
+ kvfree(desc);
+}
+
static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
@@ -4730,6 +4807,10 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
}
ret = 1;
break;
+ case SVM_VMGEXIT_GET_APIC_IDS:
+ sev_get_apic_ids(svm);
+ ret = 1;
+ break;
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
vcpu_unimpl(vcpu,
"vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
--
2.53.0
^ permalink raw reply related
* [PATCH 52/60] kvm: x86: Switch to plane0 if it has events
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Joerg Roedel <joerg.roedel@amd.com>
When there are IRQs or events pending for plane0, make sure it can
handle it.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kvm/x86.c | 34 +++++++++++++++++++++++++++++-----
1 file changed, 29 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 08fe65b8d57d..60b34bd4da9d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10962,6 +10962,20 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
return r;
}
+static inline bool kvm_check_plane0_events(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu *vcpu_plane0;
+
+ if (vcpu->plane_level == 0)
+ return false;
+
+ vcpu_plane0 = vcpu->common->vcpus[0];
+
+ return kvm_cpu_has_injectable_intr(vcpu_plane0) ||
+ vcpu_plane0->arch.nmi_pending ||
+ vcpu_plane0->arch.smi_pending;
+}
+
static void process_nmi(struct kvm_vcpu *vcpu)
{
unsigned int limit;
@@ -11410,12 +11424,19 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto out;
}
}
+ }
- if (kvm_check_request(KVM_REQ_PLANE_RESCHED, vcpu)) {
- vcpu->common->plane_switch = true;
- r = 0;
- goto out;
- }
+ if (kvm_check_plane0_events(vcpu)) {
+ kvm_vcpu_set_plane_runnable(vcpu->common->vcpus[0]);
+
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(KVM_REQ_PLANE_RESCHED, vcpu);
+ }
+
+ if (kvm_check_request(KVM_REQ_PLANE_RESCHED, vcpu)) {
+ vcpu->common->plane_switch = true;
+ r = 0;
+ goto out;
}
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
@@ -11737,6 +11758,9 @@ bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
if (kvm_test_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, vcpu))
return true;
+ if (kvm_test_request(KVM_REQ_PLANE_RESCHED, vcpu))
+ return true;
+
if (kvm_arch_interrupt_allowed(vcpu) && kvm_cpu_has_interrupt(vcpu))
return true;
--
2.53.0
^ permalink raw reply related
* [PATCH 26/60] kvm: Introduce arch-specific plane state
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Joerg Roedel <joerg.roedel@amd.com>
Introduce struct kvm_arch_plane which is per architecture and will be
used to store architecture-specific per-plane state.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/arm64/include/asm/kvm_host.h | 12 ++++++++++++
arch/loongarch/include/asm/kvm_host.h | 12 ++++++++++++
arch/mips/include/asm/kvm_host.h | 12 ++++++++++++
arch/powerpc/include/asm/kvm_host.h | 12 ++++++++++++
arch/riscv/include/asm/kvm_host.h | 12 ++++++++++++
arch/s390/include/asm/kvm_host.h | 12 ++++++++++++
arch/x86/include/asm/kvm_host.h | 12 ++++++++++++
include/linux/kvm_host.h | 2 ++
include/linux/kvm_types.h | 1 +
virt/kvm/kvm_main.c | 9 +++++++++
10 files changed, 96 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 32dc484781f0..e9cca2adb371 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -306,6 +306,18 @@ enum fgt_group_id {
__NR_FGT_GROUP_IDS__
};
+/* Per-plane state of VM */
+struct kvm_arch_plane {};
+
+static inline int kvm_arch_plane_init(struct kvm *kvm,
+ struct kvm_plane *plane,
+ unsigned plane_level)
+{
+ return 0;
+}
+
+static inline void kvm_arch_plane_destroy(struct kvm_plane *plane) {}
+
struct kvm_arch {
struct kvm_s2_mmu mmu;
diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
index 776bc487a705..225aa87ebbdd 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -116,6 +116,18 @@ struct kvm_phyid_map {
struct kvm_phyid_info phys_map[KVM_MAX_PHYID];
};
+/* Per-plane state of VM */
+struct kvm_arch_plane {};
+
+static inline int kvm_arch_plane_init(struct kvm *kvm,
+ struct kvm_plane *plane,
+ unsigned plane_level)
+{
+ return 0;
+}
+
+static inline void kvm_arch_plane_destroy(struct kvm_plane *plane) {}
+
struct kvm_arch {
/* Guest physical mm */
kvm_pte_t *pgd;
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index c14b10821817..b01911eb9064 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -172,6 +172,18 @@ struct loongson_kvm_ipi {
};
#endif
+/* Per-plane state of VM */
+struct kvm_arch_plane {};
+
+static inline int kvm_arch_plane_init(struct kvm *kvm,
+ struct kvm_plane *plane,
+ unsigned plane_level)
+{
+ return 0;
+}
+
+static inline void kvm_arch_plane_destroy(struct kvm_plane *plane) {}
+
struct kvm_arch {
/* Guest physical mm */
struct mm_struct gpa_mm;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 2d139c807577..c5b9fbaf34f3 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -269,6 +269,18 @@ struct kvm_hpt_info {
struct kvm_resize_hpt;
+/* Per-plane state of VM */
+struct kvm_arch_plane {};
+
+static inline int kvm_arch_plane_init(struct kvm *kvm,
+ struct kvm_plane *plane,
+ unsigned plane_level)
+{
+ return 0;
+}
+
+static inline void kvm_arch_plane_destroy(struct kvm_plane *plane) {}
+
/* Flag values for kvm_arch.secure_guest */
#define KVMPPC_SECURE_INIT_START 0x1 /* H_SVM_INIT_START has been called */
#define KVMPPC_SECURE_INIT_DONE 0x2 /* H_SVM_INIT_DONE completed */
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 75b0a951c1bc..bcbf487d4cb7 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -76,6 +76,18 @@ struct kvm_vcpu_stat {
struct kvm_arch_memory_slot {
};
+/* Per-plane state of VM */
+struct kvm_arch_plane {};
+
+static inline int kvm_arch_plane_init(struct kvm *kvm,
+ struct kvm_plane *plane,
+ unsigned plane_level)
+{
+ return 0;
+}
+
+static inline void kvm_arch_plane_destroy(struct kvm_plane *plane) {}
+
struct kvm_arch {
/* G-stage vmid */
struct kvm_vmid vmid;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 8a4f4a39f7a2..bb3bfbfd35d8 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -638,6 +638,18 @@ struct kvm_s390_pv {
struct kvm_s390_mmu_cache;
+/* Per-plane state of VM */
+struct kvm_arch_plane {};
+
+static inline int kvm_arch_plane_init(struct kvm *kvm,
+ struct kvm_plane *plane,
+ unsigned plane_level)
+{
+ return 0;
+}
+
+static inline void kvm_arch_plane_destroy(struct kvm_plane *plane) {}
+
struct kvm_arch {
struct esca_block *sca;
debug_info_t *dbf;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 43c92f0ada1e..dd95c70bfdba 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1412,6 +1412,18 @@ enum kvm_mmu_type {
KVM_NR_MMU_TYPES,
};
+/* Per-plane state of VM */
+struct kvm_arch_plane {};
+
+static inline int kvm_arch_plane_init(struct kvm *kvm,
+ struct kvm_plane *plane,
+ unsigned plane_level)
+{
+ return 0;
+}
+
+static inline void kvm_arch_plane_destroy(struct kvm_plane *plane) {}
+
struct kvm_arch {
unsigned long n_used_mmu_pages;
unsigned long n_requested_mmu_pages;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7d06459a06f3..4a0eaa1de479 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -847,6 +847,8 @@ struct kvm_plane {
/* Per-Plane VCPU array */
struct xarray vcpu_array;
+
+ struct kvm_arch_plane arch;
};
struct kvm {
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index a568d8e6f4e8..07e82928c948 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -42,6 +42,7 @@ struct kvm_interrupt;
struct kvm_irq_routing_table;
struct kvm_memory_slot;
struct kvm_one_reg;
+struct kvm_plane;
struct kvm_run;
struct kvm_userspace_memory_region;
struct kvm_vcpu;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f85ddb0fc781..91fb9abf9b31 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1211,13 +1211,22 @@ static struct kvm_plane *kvm_create_plane(struct kvm *kvm, unsigned plane_level)
xa_init(&plane->vcpu_array);
+ if (kvm_arch_plane_init(kvm, plane, plane_level))
+ goto out_free_plane;
+
kvm->planes[plane_level] = plane;
return plane;
+
+out_free_plane:
+ kfree(plane);
+
+ return NULL;
}
static void kvm_destroy_one_plane(struct kvm_plane *plane)
{
+ kvm_arch_plane_destroy(plane);
kfree(plane);
}
--
2.53.0
^ permalink raw reply related
* [PATCH 05/60] KVM: SVM: Inject MCEs when Restricted Injection is active
From: Jörg Rödel @ 2026-06-08 14:41 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Melody Wang <huibo.wang@amd.com>
When Restricted Injection is active, only #HV exceptions can be injected
into the SEV-SNP guest.
Detect that, and then follow the #HV doorbell communication from the
GHCB specification to inject the MCEs.
Co-developed-by: Thomas Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Thomas Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Melody Wang <huibo.wang@amd.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/include/asm/kvm-x86-ops.h | 1 +
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/svm/sev.c | 16 ++++++++++++++--
arch/x86/kvm/svm/svm.c | 17 +++++++++++++++++
arch/x86/kvm/svm/svm.h | 2 ++
arch/x86/kvm/vmx/main.c | 10 ++++++++++
arch/x86/kvm/vmx/vmx.c | 5 +++++
arch/x86/kvm/vmx/x86_ops.h | 1 +
arch/x86/kvm/x86.c | 7 +++++++
9 files changed, 58 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 3776cf5382a2..c8bff1e9325e 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -79,6 +79,7 @@ KVM_X86_OP(inject_exception)
KVM_X86_OP(cancel_injection)
KVM_X86_OP(interrupt_allowed)
KVM_X86_OP(nmi_allowed)
+KVM_X86_OP_OPTIONAL(mce_allowed)
KVM_X86_OP(get_nmi_mask)
KVM_X86_OP(set_nmi_mask)
KVM_X86_OP(enable_nmi_window)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f14009f25a3b..43c92f0ada1e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1861,6 +1861,7 @@ struct kvm_x86_ops {
void (*cancel_injection)(struct kvm_vcpu *vcpu);
int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
+ int (*mce_allowed)(struct kvm_vcpu *vcpu);
bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
/* Whether or not a virtual NMI is pending in hardware. */
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index b48745fad8c5..6d5d66563b0d 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -5430,6 +5430,8 @@ static void __sev_snp_inject(enum inject_type type, struct kvm_vcpu *vcpu)
if (type == INJECT_NMI)
hvdb->events.nmi = 1;
+ else if (type == INJECT_MCE)
+ hvdb->events.mce = 1;
else
hvdb->events.vector = vcpu->arch.interrupt.nr;
@@ -5445,6 +5447,11 @@ bool sev_snp_queue_exception(struct kvm_vcpu *vcpu)
if (!sev_snp_is_rinj_active(vcpu))
return false;
+ if (vcpu->arch.exception.vector == MC_VECTOR) {
+ __sev_snp_inject(INJECT_MCE, vcpu);
+ return true;
+ }
+
/*
* Restricted Injection is enabled, only #HV is supported.
* If the vector is not HV_VECTOR, do not inject the exception,
@@ -5513,7 +5520,7 @@ void sev_snp_cancel_injection(struct kvm_vcpu *vcpu)
/*
* KVM only injects a single event each time (prepare_hv_injection),
- * so when events.nmi is true, the vector will be zero
+ * so when events.nmi is true, the MCE and vector will be zero.
*/
if (hvdb->events.vector)
svm->vmcb->control.event_inj |= hvdb->events.vector |
@@ -5522,6 +5529,9 @@ void sev_snp_cancel_injection(struct kvm_vcpu *vcpu)
if (hvdb->events.nmi)
svm->vmcb->control.event_inj |= SVM_EVTINJ_TYPE_NMI;
+ if (hvdb->events.mce)
+ svm->vmcb->control.event_inj |= MC_VECTOR | SVM_EVTINJ_TYPE_EXEPT;
+
hvdb->events.pending_events = 0;
out:
@@ -5547,9 +5557,11 @@ bool sev_snp_blocked(enum inject_type type, struct kvm_vcpu *vcpu)
if (!hvdb)
return true;
- /* Indicate NMIs and interrupts blocked based on guest acknowledgment */
+ /* Indicate NMIs, MCEs and interrupts blocked based on guest acknowledgment */
if (type == INJECT_NMI)
blocked = hvdb->events.nmi;
+ else if (type == INJECT_MCE)
+ blocked = hvdb->events.mce;
else
blocked = !!hvdb->events.vector;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 5255393986cc..295e02c17b9b 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4049,6 +4049,22 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
return 1;
}
+bool svm_mce_blocked(struct kvm_vcpu *vcpu)
+{
+ if (sev_snp_is_rinj_active(vcpu))
+ return sev_snp_blocked(INJECT_MCE, vcpu);
+
+ return false;
+}
+
+static int svm_mce_allowed(struct kvm_vcpu *vcpu)
+{
+ if (svm_mce_blocked(vcpu))
+ return 0;
+
+ return 1;
+}
+
static void svm_enable_irq_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -5362,6 +5378,7 @@ struct kvm_x86_ops svm_x86_ops __initdata = {
.cancel_injection = svm_cancel_injection,
.interrupt_allowed = svm_interrupt_allowed,
.nmi_allowed = svm_nmi_allowed,
+ .mce_allowed = svm_mce_allowed,
.get_nmi_mask = svm_get_nmi_mask,
.set_nmi_mask = svm_set_nmi_mask,
.enable_nmi_window = svm_enable_nmi_window,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index bb0e5bfdb9a6..7d27ed7099a8 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -58,6 +58,7 @@ extern struct kvm_x86_ops svm_x86_ops __initdata;
enum inject_type {
INJECT_IRQ,
INJECT_NMI,
+ INJECT_MCE,
};
/*
@@ -801,6 +802,7 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
void disable_nmi_singlestep(struct vcpu_svm *svm);
bool svm_smi_blocked(struct kvm_vcpu *vcpu);
bool svm_nmi_blocked(struct kvm_vcpu *vcpu);
+bool svm_mce_blocked(struct kvm_vcpu *vcpu);
bool svm_interrupt_blocked(struct kvm_vcpu *vcpu);
void svm_set_gif(struct vcpu_svm *svm, bool value);
int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code);
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index dbebddf648be..f9c4703dda54 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -812,6 +812,15 @@ static void vt_cancel_hv_timer(struct kvm_vcpu *vcpu)
}
#endif
+static int vt_mce_allowed(struct kvm_vcpu *vcpu)
+{
+ if (is_td_vcpu(vcpu))
+ return 0;
+
+ return vmx_mce_allowed(vcpu);
+}
+
+
static void vt_setup_mce(struct kvm_vcpu *vcpu)
{
if (is_td_vcpu(vcpu))
@@ -945,6 +954,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.cancel_injection = vt_op(cancel_injection),
.interrupt_allowed = vt_op(interrupt_allowed),
.nmi_allowed = vt_op(nmi_allowed),
+ .mce_allowed = vt_op(mce_allowed),
.get_nmi_mask = vt_op(get_nmi_mask),
.set_nmi_mask = vt_op(set_nmi_mask),
.enable_nmi_window = vt_op(enable_nmi_window),
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b9103de01428..a82a4197d18a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5220,6 +5220,11 @@ int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
return !vmx_interrupt_blocked(vcpu);
}
+int vmx_mce_allowed(struct kvm_vcpu *vcpu)
+{
+ return 1;
+}
+
int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
{
void __user *ret;
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index d09abeac2b56..b75dfe7f039d 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -92,6 +92,7 @@ void vmx_inject_exception(struct kvm_vcpu *vcpu);
void vmx_cancel_injection(struct kvm_vcpu *vcpu);
int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection);
int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection);
+int vmx_mce_allowed(struct kvm_vcpu *vcpu);
bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
void vmx_enable_nmi_window(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0550359ed798..4b6b628efa21 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10794,6 +10794,12 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
kvm_update_dr7(vcpu);
}
+ if (vcpu->arch.exception.vector == MC_VECTOR) {
+ r = static_call(kvm_x86_mce_allowed)(vcpu);
+ if (!r)
+ goto out_except;
+ }
+
kvm_inject_exception(vcpu);
vcpu->arch.exception.pending = false;
@@ -10801,6 +10807,7 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
can_inject = false;
}
+out_except:
/* Don't inject interrupts if the user asked to avoid doing so */
if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ)
--
2.53.0
^ permalink raw reply related
* [PATCH 42/60] kvm: x86: Make local APIC code aware of planes
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Paolo Bonzini <pbonzini@redhat.com>
Make the local apic code aware of planes and only operate on APICs
within the same plane level.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Co-developed-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kvm/hyperv.c | 2 +-
arch/x86/kvm/ioapic.c | 8 +++----
arch/x86/kvm/irq.c | 8 ++++---
arch/x86/kvm/lapic.c | 50 +++++++++++++++++++++----------------------
arch/x86/kvm/lapic.h | 12 +++++------
arch/x86/kvm/x86.c | 6 +++---
arch/x86/kvm/xen.c | 2 +-
7 files changed, 45 insertions(+), 43 deletions(-)
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 4438ecac9a89..0a5d8e302f32 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -492,7 +492,7 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
irq.vector = vector;
irq.level = 1;
- ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq);
+ ret = kvm_irq_delivery_to_apic(vcpu->plane, vcpu->arch.apic, &irq);
trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret);
return ret;
}
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index eed96ff6e722..539edee73047 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -429,7 +429,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
irq.dest_id = e->fields.dest_id;
irq.msi_redir_hint = false;
bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
- kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
+ kvm_bitmap_or_dest_vcpus(ioapic->kvm->planes[0], &irq,
vcpu_bitmap);
if (old_dest_mode != e->fields.dest_mode ||
old_dest_id != e->fields.dest_id) {
@@ -442,7 +442,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
irq.dest_mode =
kvm_lapic_irq_dest_mode(
!!e->fields.dest_mode);
- kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
+ kvm_bitmap_or_dest_vcpus(ioapic->kvm->planes[0], &irq,
vcpu_bitmap);
}
kvm_make_scan_ioapic_request_mask(ioapic->kvm,
@@ -485,11 +485,11 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
* if rtc_irq_check_coalesced returns false).
*/
BUG_ON(ioapic->rtc_status.pending_eoi != 0);
- ret = __kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
+ ret = __kvm_irq_delivery_to_apic(ioapic->kvm->planes[0], NULL, &irqe,
&ioapic->rtc_status);
ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret);
} else
- ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
+ ret = kvm_irq_delivery_to_apic(ioapic->kvm->planes[0], NULL, &irqe);
if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG)
entry->fields.remote_irr = 1;
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index bc748a4b7cbd..3bf2ecfd9cb4 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -226,6 +226,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level, bool line_status)
{
struct kvm_lapic_irq irq;
+ struct kvm_plane *plane;
if (kvm_msi_route_invalid(kvm, e))
return -EINVAL;
@@ -234,8 +235,9 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
return -1;
kvm_msi_to_lapic_irq(kvm, e, &irq);
+ plane = kvm->planes[e->msi.plane_level];
- return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
+ return kvm_irq_delivery_to_apic(plane, NULL, &irq);
}
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
@@ -258,7 +260,7 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
kvm_msi_to_lapic_irq(kvm, e, &irq);
- if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r))
+ if (kvm_irq_delivery_to_apic_fast(kvm->planes[e->msi.plane_level], NULL, &irq, &r))
return r;
break;
@@ -453,7 +455,7 @@ static int kvm_pi_update_irte(struct kvm_kernel_irqfd *irqfd,
* if they have a single CPU as the destination, e.g. only if
* the guest has affined the interrupt to a single vCPU.
*/
- if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
+ if (!kvm_intr_is_single_vcpu(kvm->planes[0], &irq, &vcpu) ||
!kvm_irq_is_postable(&irq))
vcpu = NULL;
}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 06a12b49fafa..cac076445472 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1153,7 +1153,7 @@ static int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
* means that the interrupt should be dropped. In this case, *bitmap would be
* zero and *dst undefined.
*/
-static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
+static inline bool kvm_apic_map_get_dest_lapic(struct kvm_plane *plane,
struct kvm_lapic **src, struct kvm_lapic_irq *irq,
struct kvm_apic_map *map, struct kvm_lapic ***dst,
unsigned long *bitmap)
@@ -1167,7 +1167,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
} else if (irq->shorthand)
return false;
- if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
+ if (!map || kvm_apic_is_broadcast_dest(plane->kvm, src, irq, map))
return false;
if (irq->dest_mode == APIC_DEST_PHYSICAL) {
@@ -1208,7 +1208,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
bitmap, 16);
if (!(*dst)[lowest]) {
- kvm_apic_disabled_lapic_found(kvm);
+ kvm_apic_disabled_lapic_found(plane->kvm);
*bitmap = 0;
return true;
}
@@ -1219,7 +1219,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
return true;
}
-static bool __kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
+static bool __kvm_irq_delivery_to_apic_fast(struct kvm_plane *plane, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, int *r,
struct rtc_status *rtc_status)
{
@@ -1232,7 +1232,7 @@ static bool __kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *s
*r = -1;
if (irq->shorthand == APIC_DEST_SELF) {
- if (KVM_BUG_ON(!src, kvm)) {
+ if (KVM_BUG_ON(!src, plane->kvm)) {
*r = 0;
return true;
}
@@ -1241,9 +1241,9 @@ static bool __kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *s
}
rcu_read_lock();
- map = rcu_dereference(kvm->planes[0]->arch.apic_map);
+ map = rcu_dereference(plane->arch.apic_map);
- ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
+ ret = kvm_apic_map_get_dest_lapic(plane, &src, irq, map, &dst, &bitmap);
if (ret) {
*r = 0;
for_each_set_bit(i, &bitmap, 16) {
@@ -1258,10 +1258,10 @@ static bool __kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *s
}
-bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
+bool kvm_irq_delivery_to_apic_fast(struct kvm_plane *plane, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, int *r)
{
- return __kvm_irq_delivery_to_apic_fast(kvm, src, irq, r, NULL);
+ return __kvm_irq_delivery_to_apic_fast(plane, src, irq, r, NULL);
}
/*
@@ -1278,7 +1278,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
* interrupt.
* - Otherwise, use remapped mode to inject the interrupt.
*/
-static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
+static bool kvm_intr_is_single_vcpu_fast(struct kvm_plane *plane,
struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu)
{
@@ -1291,9 +1291,9 @@ static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
return false;
rcu_read_lock();
- map = rcu_dereference(kvm->planes[0]->arch.apic_map);
+ map = rcu_dereference(plane->arch.apic_map);
- if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
+ if (kvm_apic_map_get_dest_lapic(plane, NULL, irq, map, &dst, &bitmap) &&
hweight16(bitmap) == 1) {
unsigned long i = find_first_bit(&bitmap, 16);
@@ -1307,17 +1307,17 @@ static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
return ret;
}
-bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+bool kvm_intr_is_single_vcpu(struct kvm_plane *plane, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu)
{
int r = 0;
unsigned long i;
struct kvm_vcpu *vcpu;
- if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu))
+ if (kvm_intr_is_single_vcpu_fast(plane, irq, dest_vcpu))
return true;
- kvm_for_each_vcpu(i, vcpu, kvm) {
+ plane_for_each_vcpu(i, vcpu, plane) {
if (!kvm_apic_present(vcpu))
continue;
@@ -1335,7 +1335,7 @@ bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_intr_is_single_vcpu);
-int __kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+int __kvm_irq_delivery_to_apic(struct kvm_plane *plane, struct kvm_lapic *src,
struct kvm_lapic_irq *irq,
struct rtc_status *rtc_status)
{
@@ -1344,7 +1344,7 @@ int __kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
unsigned long i, dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
unsigned int dest_vcpus = 0;
- if (__kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, rtc_status))
+ if (__kvm_irq_delivery_to_apic_fast(plane, src, irq, &r, rtc_status))
return r;
if (irq->dest_mode == APIC_DEST_PHYSICAL &&
@@ -1355,7 +1355,7 @@ int __kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
- kvm_for_each_vcpu(i, vcpu, kvm) {
+ plane_for_each_vcpu(i, vcpu, plane) {
if (!kvm_apic_present(vcpu))
continue;
@@ -1384,7 +1384,7 @@ int __kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
int idx = kvm_vector_to_index(irq->vector, dest_vcpus,
dest_vcpu_bitmap, KVM_MAX_VCPUS);
- lowest = kvm_get_vcpu(kvm, idx);
+ lowest = plane_get_vcpu(plane, idx);
}
if (lowest)
@@ -1500,7 +1500,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
* out the destination vcpus array and set the bitmap or it traverses to
* each available vcpu to identify the same.
*/
-void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
+void kvm_bitmap_or_dest_vcpus(struct kvm_plane *plane, struct kvm_lapic_irq *irq,
unsigned long *vcpu_bitmap)
{
struct kvm_lapic **dest_vcpu = NULL;
@@ -1512,9 +1512,9 @@ void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
bool ret;
rcu_read_lock();
- map = rcu_dereference(kvm->planes[0]->arch.apic_map);
+ map = rcu_dereference(plane->arch.apic_map);
- ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
+ ret = kvm_apic_map_get_dest_lapic(plane, &src, irq, map, &dest_vcpu,
&bitmap);
if (ret) {
for_each_set_bit(i, &bitmap, 16) {
@@ -1524,7 +1524,7 @@ void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
__set_bit(vcpu_idx, vcpu_bitmap);
}
} else {
- kvm_for_each_vcpu(i, vcpu, kvm) {
+ plane_for_each_vcpu(i, vcpu, plane) {
if (!kvm_apic_present(vcpu))
continue;
if (!kvm_apic_match_dest(vcpu, NULL,
@@ -1651,7 +1651,7 @@ void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
trace_kvm_apic_ipi(icr_low, irq.dest_id);
- kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
+ kvm_irq_delivery_to_apic(apic->vcpu->plane, apic, &irq);
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_send_ipi);
@@ -2619,7 +2619,7 @@ static int __kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data, bool fast)
kvm_icr_to_lapic_irq(apic, (u32)data, (u32)(data >> 32), &irq);
- if (!kvm_irq_delivery_to_apic_fast(apic->vcpu->kvm, apic, &irq,
+ if (!kvm_irq_delivery_to_apic_fast(apic->vcpu->plane, apic, &irq,
&ignored))
return -EWOULDBLOCK;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index afd440c88981..a9ede0e145d9 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -116,17 +116,17 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu);
int kvm_alloc_apic_access_page(struct kvm *kvm);
void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu);
-bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
+bool kvm_irq_delivery_to_apic_fast(struct kvm_plane *plane, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, int *r);
-int __kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+int __kvm_irq_delivery_to_apic(struct kvm_plane *plane, struct kvm_lapic *src,
struct kvm_lapic_irq *irq,
struct rtc_status *rtc_status);
-static inline int kvm_irq_delivery_to_apic(struct kvm *kvm,
+static inline int kvm_irq_delivery_to_apic(struct kvm_plane *plane,
struct kvm_lapic *src,
struct kvm_lapic_irq *irq)
{
- return __kvm_irq_delivery_to_apic(kvm, src, irq, NULL);
+ return __kvm_irq_delivery_to_apic(plane, src, irq, NULL);
}
void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high);
@@ -244,10 +244,10 @@ bool kvm_lapic_suppress_eoi_broadcast(struct kvm_lapic *apic);
void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
-void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
+void kvm_bitmap_or_dest_vcpus(struct kvm_plane *plane, struct kvm_lapic_irq *irq,
unsigned long *vcpu_bitmap);
-bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+bool kvm_intr_is_single_vcpu(struct kvm_plane *plane, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu);
void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu);
void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 070f87ae23eb..7fc08df245bd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10373,7 +10373,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
*
* @apicid - apicid of vcpu to be kicked.
*/
-static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid)
+static void kvm_pv_kick_cpu_op(struct kvm_plane *plane, int apicid)
{
/*
* All other fields are unused for APIC_DM_REMRD, but may be consumed by
@@ -10386,7 +10386,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid)
.dest_id = apicid,
};
- kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq);
+ kvm_irq_delivery_to_apic(plane, NULL, &lapic_irq);
}
bool kvm_apicv_activated(struct kvm *kvm)
@@ -10515,7 +10515,7 @@ int ____kvm_emulate_hypercall(struct kvm_vcpu *vcpu, int cpl,
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT))
break;
- kvm_pv_kick_cpu_op(vcpu->kvm, a1);
+ kvm_pv_kick_cpu_op(vcpu->plane, a1);
kvm_sched_yield(vcpu, a1);
ret = 0;
break;
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 91fd3673c09a..06c5789f406b 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -626,7 +626,7 @@ void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v)
irq.delivery_mode = APIC_DM_FIXED;
irq.level = 1;
- kvm_irq_delivery_to_apic(v->kvm, NULL, &irq);
+ kvm_irq_delivery_to_apic(v->plane, NULL, &irq);
}
/*
--
2.53.0
^ permalink raw reply related
* [PATCH 10/60] kvm: Move vcpu_array to struct kvm_plane
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Paolo Bonzini <pbonzini@redhat.com>
Each plane will have its own set of VCPUs, so move the vcpu_array to
the plane structure.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Co-developed-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
include/linux/kvm_host.h | 30 +++++++++++++++++++++---------
virt/kvm/kvm_main.c | 33 +++++++++++++++++++++++++--------
2 files changed, 46 insertions(+), 17 deletions(-)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5be4c9f118b4..5a72f73a2f31 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -324,12 +324,14 @@ struct kvm_mmio_fragment {
struct kvm_vcpu {
struct kvm *kvm;
+ struct kvm_plane *plane;
+
#ifdef CONFIG_PREEMPT_NOTIFIERS
struct preempt_notifier preempt_notifier;
#endif
int cpu;
int vcpu_id; /* id given by userspace at creation */
- int vcpu_idx; /* index into kvm->vcpu_array */
+ int vcpu_idx; /* index into kvm->planes[]->vcpu_array */
int ____srcu_idx; /* Don't use this directly. You've been warned. */
#ifdef CONFIG_PROVE_RCU
int srcu_depth;
@@ -770,6 +772,9 @@ struct kvm_memslots {
struct kvm_plane {
struct kvm *kvm;
unsigned level;
+
+ /* Per-Plane VCPU array */
+ struct xarray vcpu_array;
};
struct kvm {
@@ -795,7 +800,6 @@ struct kvm {
struct kvm_memslots __memslots[KVM_MAX_NR_ADDRESS_SPACES][2];
/* The current active memslot set for each address space */
struct kvm_memslots __rcu *memslots[KVM_MAX_NR_ADDRESS_SPACES];
- struct xarray vcpu_array;
/*
* Protected by slots_lock, but can be read outside if an
* incorrect answer is acceptable.
@@ -996,9 +1000,9 @@ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
lockdep_is_held(&kvm->slots_lock));
}
-static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
+static inline struct kvm_vcpu *plane_get_vcpu(struct kvm_plane *plane, int i)
{
- int num_vcpus = atomic_read(&kvm->online_vcpus);
+ int num_vcpus = atomic_read(&plane->kvm->online_vcpus);
/*
* Explicitly verify the target vCPU is online, as the anti-speculation
@@ -1012,13 +1016,21 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
/* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */
smp_rmb();
- return xa_load(&kvm->vcpu_array, i);
+ return xa_load(&plane->vcpu_array, i);
}
-#define kvm_for_each_vcpu(idx, vcpup, kvm) \
- if (atomic_read(&kvm->online_vcpus)) \
- xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \
- (atomic_read(&kvm->online_vcpus) - 1))
+static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
+{
+ return plane_get_vcpu(kvm->planes[0], i);
+}
+
+#define plane_for_each_vcpu(idx, vcpup, plane) \
+ if (atomic_read(&plane->kvm->online_vcpus)) \
+ xa_for_each_range(&plane->vcpu_array, idx, vcpup, 0, \
+ (atomic_read(&plane->kvm->online_vcpus) - 1))
+
+#define kvm_for_each_vcpu(idx, vcpup, kvm) \
+ plane_for_each_vcpu(idx, vcpup, kvm->planes[0])
static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
{
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a68469c6d12e..668645dd3945 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -443,6 +443,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
mutex_init(&vcpu->mutex);
vcpu->cpu = -1;
vcpu->kvm = kvm;
+ vcpu->plane = kvm->planes[0];
vcpu->vcpu_id = id;
vcpu->pid = NULL;
rwlock_init(&vcpu->pid_lock);
@@ -479,14 +480,14 @@ static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
kmem_cache_free(kvm_vcpu_cache, vcpu);
}
-void kvm_destroy_vcpus(struct kvm *kvm)
+static void plane_destroy_vcpus(struct kvm_plane *plane)
{
unsigned long i;
struct kvm_vcpu *vcpu;
- kvm_for_each_vcpu(i, vcpu, kvm) {
+ plane_for_each_vcpu(i, vcpu, plane) {
kvm_vcpu_destroy(vcpu);
- xa_erase(&kvm->vcpu_array, i);
+ xa_erase(&plane->vcpu_array, i);
/*
* Assert that the vCPU isn't visible in any way, to ensure KVM
@@ -494,7 +495,22 @@ void kvm_destroy_vcpus(struct kvm *kvm)
* in VM-wide request, e.g. to flush remote TLBs when tearing
* down MMUs, or to mark the VM dead if a KVM_BUG_ON() fires.
*/
- WARN_ON_ONCE(xa_load(&kvm->vcpu_array, i) || kvm_get_vcpu(kvm, i));
+ WARN_ON_ONCE(xa_load(&plane->vcpu_array, i) || plane_get_vcpu(plane, i));
+ }
+
+}
+
+void kvm_destroy_vcpus(struct kvm *kvm)
+{
+ unsigned lvl;
+
+ for (lvl = KVM_MAX_PLANES; lvl > 0; lvl--) {
+ struct kvm_plane *plane = kvm->planes[lvl - 1];
+
+ if (plane == NULL)
+ continue;
+
+ plane_destroy_vcpus(plane);
}
atomic_set(&kvm->online_vcpus, 0);
@@ -1105,6 +1121,8 @@ static struct kvm_plane *kvm_create_plane(struct kvm *kvm, unsigned plane_level)
plane->kvm = kvm;
plane->level = plane_level;
+ xa_init(&plane->vcpu_array);
+
kvm->planes[plane_level] = plane;
return plane;
@@ -1146,7 +1164,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
mutex_init(&kvm->slots_arch_lock);
spin_lock_init(&kvm->mn_invalidate_lock);
rcuwait_init(&kvm->mn_memslots_update_rcuwait);
- xa_init(&kvm->vcpu_array);
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
xa_init(&kvm->mem_attr_array);
#endif
@@ -4039,7 +4056,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
if (idx == me->vcpu_idx)
continue;
- vcpu = xa_load(&kvm->vcpu_array, idx);
+ vcpu = xa_load(&kvm->planes[0]->vcpu_array, idx);
if (!READ_ONCE(vcpu->ready))
continue;
if (kvm_vcpu_is_blocking(vcpu) && !vcpu_dy_runnable(vcpu))
@@ -4258,7 +4275,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
}
vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus);
- r = xa_insert(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT);
+ r = xa_insert(&kvm->planes[0]->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT);
WARN_ON_ONCE(r == -EBUSY);
if (r)
goto unlock_vcpu_destroy;
@@ -4293,7 +4310,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
kvm_put_xa_erase:
mutex_unlock(&vcpu->mutex);
kvm_put_kvm_no_destroy(kvm);
- xa_erase(&kvm->vcpu_array, vcpu->vcpu_idx);
+ xa_erase(&kvm->planes[0]->vcpu_array, vcpu->vcpu_idx);
unlock_vcpu_destroy:
mutex_unlock(&kvm->lock);
kvm_dirty_ring_free(&vcpu->dirty_ring);
--
2.53.0
^ permalink raw reply related
* [PATCH 14/60] kvm: Make kvm_running_vcpus point to struct kvm_vcpu_common
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Joerg Roedel <joerg.roedel@amd.com>
This will remove the need to update kvm_running_vcpu on plane
switches.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/arm64/kvm/arch_timer.c | 3 ++-
arch/arm64/kvm/vgic/vgic-init.c | 3 ++-
include/linux/kvm_host.h | 2 +-
virt/kvm/kvm_main.c | 18 +++++++++++-------
4 files changed, 16 insertions(+), 10 deletions(-)
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index cbea4d9ee955..b2c4f422414e 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -204,7 +204,8 @@ static void soft_timer_cancel(struct hrtimer *hrt)
static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
{
- struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
+ struct kvm_vcpu_common *common = *(struct kvm_vcpu_common **)dev_id;
+ struct kvm_vcpu *vcpu = common->current_vcpu;
struct arch_timer_context *ctx;
struct timer_map map;
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 933983bb2005..a12b89b423d5 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -730,7 +730,8 @@ void kvm_vgic_cpu_down(void)
static irqreturn_t vgic_maintenance_handler(int irq, void *data)
{
- struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)data;
+ struct kvm_vcpu_common *common = *(struct kvm_vcpu_common **)data;
+ struct kvm_vcpu *vcpu = common->current_vcpu;
/*
* We cannot rely on the vgic maintenance interrupt to be
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b334c15d834e..d54f299218a4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2462,7 +2462,7 @@ static inline bool kvm_is_visible_memslot(struct kvm_memory_slot *memslot)
}
struct kvm_vcpu *kvm_get_running_vcpu(void);
-struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
+struct kvm_vcpu_common * __percpu *kvm_get_running_vcpus(void);
#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
struct kvm_kernel_irqfd;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2c16e124a507..9c07321e30f4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -113,7 +113,7 @@ LIST_HEAD(vm_list);
static struct kmem_cache *kvm_vcpu_cache;
static __read_mostly struct preempt_ops kvm_preempt_ops;
-static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_running_vcpu);
+static DEFINE_PER_CPU(struct kvm_vcpu_common *, kvm_running_vcpu);
static struct dentry *kvm_debugfs_dir;
@@ -165,7 +165,7 @@ void vcpu_load(struct kvm_vcpu *vcpu)
{
int cpu = get_cpu();
- __this_cpu_write(kvm_running_vcpu, vcpu);
+ __this_cpu_write(kvm_running_vcpu, vcpu->common);
preempt_notifier_register(&vcpu->preempt_notifier);
kvm_arch_vcpu_load(vcpu, cpu);
put_cpu();
@@ -3954,7 +3954,7 @@ void __kvm_vcpu_kick(struct kvm_vcpu *vcpu, bool wait)
* kick" check does not need atomic operations if kvm_vcpu_kick is used
* within the vCPU thread itself.
*/
- if (vcpu == __this_cpu_read(kvm_running_vcpu)) {
+ if (vcpu == kvm_get_running_vcpu()) {
if (vcpu->mode == IN_GUEST_MODE)
WRITE_ONCE(vcpu->mode, EXITING_GUEST_MODE);
goto out;
@@ -6500,7 +6500,7 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
WRITE_ONCE(vcpu->preempted, false);
WRITE_ONCE(vcpu->ready, false);
- __this_cpu_write(kvm_running_vcpu, vcpu);
+ __this_cpu_write(kvm_running_vcpu, vcpu->common);
kvm_arch_vcpu_load(vcpu, cpu);
WRITE_ONCE(vcpu->scheduled_out, false);
@@ -6532,12 +6532,16 @@ static void kvm_sched_out(struct preempt_notifier *pn,
*/
struct kvm_vcpu *kvm_get_running_vcpu(void)
{
- struct kvm_vcpu *vcpu;
+ struct kvm_vcpu_common *common;
+ struct kvm_vcpu *vcpu = NULL;
preempt_disable();
- vcpu = __this_cpu_read(kvm_running_vcpu);
+ common = __this_cpu_read(kvm_running_vcpu);
preempt_enable();
+ if (common)
+ vcpu = common->current_vcpu;
+
return vcpu;
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_running_vcpu);
@@ -6545,7 +6549,7 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_running_vcpu);
/**
* kvm_get_running_vcpus - get the per-CPU array of currently running vcpus.
*/
-struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
+struct kvm_vcpu_common * __percpu *kvm_get_running_vcpus(void)
{
return &kvm_running_vcpu;
}
--
2.53.0
^ permalink raw reply related
* [PATCH 11/60] kvm: Introduce struct kvm_vcpu_common
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel, Carlos López
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Joerg Roedel <joerg.roedel@amd.com>
When creating one VCPU object per plane there is still a lot of VCPU
state which needes to be shared across all planes. Create struct
kvm_vcpu_common as a container for this shared state.
Co-developed-by: Carlos López <clopez@suse.de>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
include/linux/kvm_host.h | 10 ++++++++++
virt/kvm/kvm_main.c | 36 ++++++++++++++++++++++++++++++++++--
2 files changed, 44 insertions(+), 2 deletions(-)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5a72f73a2f31..c4c4922df965 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -322,6 +322,13 @@ struct kvm_mmio_fragment {
unsigned int len;
};
+struct kvm_vcpu_common {
+ struct kvm *kvm;
+
+ /* Currently active VCPU */
+ struct kvm_vcpu *current_vcpu;
+};
+
struct kvm_vcpu {
struct kvm *kvm;
struct kvm_plane *plane;
@@ -400,6 +407,9 @@ struct kvm_vcpu {
*/
struct kvm_memory_slot *last_used_slot;
u64 last_used_slot_gen;
+
+ struct kvm_vcpu_common *common;
+ unsigned plane_level;
};
/*
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 668645dd3945..fb840d029c56 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -438,6 +438,20 @@ void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
}
#endif
+static int kvm_vcpu_init_common(struct kvm_vcpu *vcpu, struct kvm *kvm)
+{
+ struct kvm_vcpu_common *common = kzalloc(sizeof(*common), GFP_KERNEL_ACCOUNT);
+
+ if (common == NULL)
+ return -ENOMEM;
+
+ common->kvm = kvm;
+ common->current_vcpu = vcpu;
+ vcpu->common = common;
+
+ return 0;
+}
+
static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
{
mutex_init(&vcpu->mutex);
@@ -459,14 +473,26 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
vcpu->last_used_slot = NULL;
+ vcpu->plane_level = 0;
+
/* Fill the stats id string for the vcpu */
snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d",
task_pid_nr(current), id);
}
+static void kvm_vcpu_common_destroy(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->plane_level == 0)
+ kfree(vcpu->common);
+
+ vcpu->common = NULL;
+}
+
static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
{
kvm_arch_vcpu_destroy(vcpu);
+
+ kvm_vcpu_common_destroy(vcpu);
kvm_dirty_ring_free(&vcpu->dirty_ring);
/*
@@ -1360,8 +1386,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
xa_destroy(&kvm->mem_attr_array);
#endif
- kvm_arch_free_vm(kvm);
kvm_destroy_planes(kvm);
+ kvm_arch_free_vm(kvm);
preempt_notifier_dec();
kvm_disable_virtualization();
mmdrop(mm);
@@ -4246,11 +4272,15 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
goto vcpu_decrement;
}
+ r = kvm_vcpu_init_common(vcpu, kvm);
+ if (r)
+ goto vcpu_free;
+
BUILD_BUG_ON(sizeof(struct kvm_run) > PAGE_SIZE);
page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!page) {
r = -ENOMEM;
- goto vcpu_free;
+ goto vcpu_free_common;
}
vcpu->run = page_address(page);
@@ -4318,6 +4348,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
kvm_arch_vcpu_destroy(vcpu);
vcpu_free_run_page:
free_page((unsigned long)vcpu->run);
+vcpu_free_common:
+ kvm_vcpu_common_destroy(vcpu);
vcpu_free:
kmem_cache_free(kvm_vcpu_cache, vcpu);
vcpu_decrement:
--
2.53.0
^ permalink raw reply related
* [PATCH 20/60] kvm: Move kvm_vcpu mode and requests field to struct kvm_vcpu_common
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Joerg Roedel <joerg.roedel@amd.com>
These fields must be shared across all planes of a given VCPU.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/powerpc/kvm/trace.h | 2 +-
arch/x86/kvm/trace.h | 2 +-
include/linux/kvm_host.h | 27 ++++++++++++++-------------
3 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index ea1d7c808319..35c000d918bb 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -108,7 +108,7 @@ TRACE_EVENT(kvm_check_requests,
TP_fast_assign(
__entry->cpu_nr = vcpu->vcpu_id;
- __entry->requests = vcpu->requests;
+ __entry->requests = vcpu->common->requests;
),
TP_printk("vcpu=%x requests=%x",
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 0db25bba17f6..0d2dd25bed12 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -409,7 +409,7 @@ TRACE_EVENT(name, \
__entry->guest_rip = tracing_kvm_rip_read(vcpu); \
__entry->isa = isa; \
__entry->vcpu_id = vcpu->vcpu_id; \
- __entry->requests = READ_ONCE(vcpu->requests); \
+ __entry->requests = READ_ONCE(vcpu->common->requests); \
kvm_x86_call(get_exit_info)(vcpu, \
&__entry->exit_reason, \
&__entry->info1, \
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 45286b3b35c9..7704820986da 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -180,7 +180,7 @@ static inline bool kvm_is_error_gpa(gpa_t gpa)
#define KVM_REQ_OUTSIDE_GUEST_MODE (KVM_REQUEST_NO_ACTION | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
- BUILD_BUG_ON((unsigned)(nr) >= (sizeof_field(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \
+ BUILD_BUG_ON((unsigned)(nr) >= (sizeof_field(struct kvm_vcpu_common, requests) * 8) - KVM_REQUEST_ARCH_BASE); \
(unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \
})
#define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0)
@@ -341,6 +341,9 @@ struct kvm_vcpu_common {
struct rcuwait wait;
#endif
+ int mode;
+ u64 requests;
+
/* Scheduling state */
#ifdef CONFIG_PREEMPT_NOTIFIERS
struct preempt_notifier preempt_notifier;
@@ -359,8 +362,6 @@ struct kvm_vcpu {
int vcpu_id; /* id given by userspace at creation */
int vcpu_idx; /* index into kvm->planes[]->vcpu_array */
- int mode;
- u64 requests;
unsigned long guest_debug;
struct kvm_run *run;
@@ -442,27 +443,27 @@ static inline bool kvm_vcpu_scheduled_out(struct kvm_vcpu *vcpu)
static inline int kvm_vcpu_mode(struct kvm_vcpu *vcpu)
{
- return vcpu->mode;
+ return vcpu->common->mode;
}
static inline int kvm_vcpu_mode_acquire(struct kvm_vcpu *vcpu)
{
- return smp_load_acquire(&vcpu->mode);
+ return smp_load_acquire(&vcpu->common->mode);
}
static inline void kvm_vcpu_set_mode(struct kvm_vcpu *vcpu, int mode)
{
- vcpu->mode = mode;
+ vcpu->common->mode = mode;
}
static inline void kvm_vcpu_set_mode_mb(struct kvm_vcpu *vcpu, int mode)
{
- smp_store_mb(vcpu->mode, mode);
+ smp_store_mb(vcpu->common->mode, mode);
}
static inline void kvm_vcpu_set_mode_release(struct kvm_vcpu *vcpu, int mode)
{
- smp_store_release(&vcpu->mode, mode);
+ smp_store_release(&vcpu->common->mode, mode);
}
/*
@@ -630,7 +631,7 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
* memory barrier following the write of vcpu->mode in VCPU RUN.
*/
smp_mb__before_atomic();
- return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE);
+ return cmpxchg(&vcpu->common->mode, IN_GUEST_MODE, EXITING_GUEST_MODE);
}
/*
@@ -2355,7 +2356,7 @@ static inline void __kvm_make_request(int req, struct kvm_vcpu *vcpu)
* caller. Paired with the smp_mb__after_atomic in kvm_check_request.
*/
smp_wmb();
- set_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
+ set_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->common->requests);
}
static __always_inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
@@ -2381,17 +2382,17 @@ static inline void kvm_make_request_and_kick(int req, struct kvm_vcpu *vcpu)
static inline bool kvm_request_pending(struct kvm_vcpu *vcpu)
{
- return READ_ONCE(vcpu->requests);
+ return READ_ONCE(vcpu->common->requests);
}
static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu)
{
- return test_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
+ return test_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->common->requests);
}
static inline void kvm_clear_request(int req, struct kvm_vcpu *vcpu)
{
- clear_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
+ clear_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->common->requests);
}
static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
--
2.53.0
^ permalink raw reply related
* [PATCH 28/60] kvm: Implement KVM_CAP_PLANES
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Paolo Bonzini <pbonzini@redhat.com>
Introduce an architecture call-back to request the VM-specific maximum
number of supported planes. Use that to implement the KVM_CAP_PLANES
capability check.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Co-developed-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/arm64/kvm/arm.c | 5 +++++
arch/loongarch/kvm/vm.c | 5 +++++
arch/mips/kvm/mips.c | 5 +++++
arch/powerpc/kvm/powerpc.c | 5 +++++
arch/riscv/kvm/main.c | 5 +++++
arch/s390/kvm/kvm-s390.c | 5 +++++
arch/x86/kvm/x86.c | 5 +++++
include/linux/kvm_host.h | 2 ++
virt/kvm/kvm_main.c | 4 ++++
9 files changed, 41 insertions(+)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 1e2f42134b74..7e6d2773fd39 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -200,6 +200,11 @@ static int kvm_arm_default_max_vcpus(void)
return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
}
+unsigned kvm_arch_max_planes(struct kvm *kvm)
+{
+ return 1;
+}
+
/**
* kvm_arch_init_vm - initializes a VM data structure
* @kvm: pointer to the KVM struct
diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c
index 1317c718f896..14f1232c6e0c 100644
--- a/arch/loongarch/kvm/vm.c
+++ b/arch/loongarch/kvm/vm.c
@@ -109,6 +109,11 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm->arch.phyid_map = NULL;
}
+unsigned kvm_arch_max_planes(struct kvm *kvm)
+{
+ return 1;
+}
+
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 776aba0af096..60870452119d 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -115,6 +115,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
return !!(vcpu->arch.pending_exceptions);
}
+unsigned kvm_arch_max_planes(struct kvm *kvm)
+{
+ return 1;
+}
+
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
{
return false;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 5d94e0f676ec..cfa40be20e00 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -499,6 +499,11 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
module_put(kvm->arch.kvm_ops->owner);
}
+unsigned kvm_arch_max_planes(struct kvm *kvm)
+{
+ return 1;
+}
+
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index cb8a65273c1f..5adba3a455a3 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -17,6 +17,11 @@
DEFINE_STATIC_KEY_FALSE(kvm_riscv_vsstage_tlb_no_gpa);
+unsigned kvm_arch_max_planes(struct kvm *kvm)
+{
+ return 1;
+}
+
static void kvm_riscv_setup_vendor_features(void)
{
/* Andes AX66: split two-stage TLBs */
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index e6fe83da172f..24f24ea95f86 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -3186,6 +3186,11 @@ static void sca_dispose(struct kvm *kvm)
kvm->arch.sca = NULL;
}
+unsigned kvm_arch_max_planes(struct kvm *kvm)
+{
+ return 1;
+}
+
void kvm_arch_free_vm(struct kvm *kvm)
{
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 50601ac2828f..25299c8c28e3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -482,6 +482,11 @@ static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) +
(KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)];
static unsigned int num_msr_based_features;
+unsigned kvm_arch_max_planes(struct kvm *kvm)
+{
+ return 1;
+}
+
/*
* All feature MSRs except uCode revID, which tracks the currently loaded uCode
* patch, are immutable once the vCPU model is defined.
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 291bccce9b74..3ecd472c7cfa 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1156,6 +1156,8 @@ void kvm_unlock_all_vcpus(struct kvm *kvm);
void vcpu_load(struct kvm_vcpu *vcpu);
void vcpu_put(struct kvm_vcpu *vcpu);
+unsigned kvm_arch_max_planes(struct kvm *kvm);
+
#ifdef CONFIG_KVM_IOAPIC
void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm);
#else
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7a0b632e3ac0..5a0277e2ac7c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -5079,6 +5079,10 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_GUEST_MEMFD_FLAGS:
return kvm_gmem_get_supported_flags(kvm);
#endif
+ case KVM_CAP_PLANES:
+ if (kvm)
+ return kvm_arch_max_planes(kvm);
+ return 1;
default:
break;
}
--
2.53.0
^ permalink raw reply related
* [PATCH 47/60] kvm: x86: Select a plane to run
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Joerg Roedel <joerg.roedel@amd.com>
In the KVM_RUN path, select a runnable VCPU plane and use it to enter
the guest. Also handle KVM_REQ_PLANE_RESCHED events to switch planes
without exiting to user-space.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kvm/x86.c | 29 ++++++++++++++++++++++++++++-
1 file changed, 28 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7e94a378b3d2..b9828cd31136 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11398,6 +11398,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto out;
}
}
+
+ if (kvm_check_request(KVM_REQ_PLANE_RESCHED, vcpu)) {
+ vcpu->common->plane_switch = true;
+ r = 0;
+ goto out;
+ }
}
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
@@ -12076,7 +12082,7 @@ static int kvm_x86_vcpu_pre_run(struct kvm_vcpu *vcpu)
return kvm_x86_call(vcpu_pre_run)(vcpu);
}
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+static int __kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
struct kvm_queued_exception *ex = &vcpu->arch.exception;
struct kvm_run *kvm_run = vcpu->run;
@@ -12196,6 +12202,27 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
return r;
}
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu_plane0)
+{
+ struct kvm_vcpu_common *common = vcpu_plane0->common;
+ int ret;
+
+ do {
+ struct kvm_vcpu *vcpu = kvm_vcpu_select_plane(vcpu_plane0);
+
+ if (vcpu == NULL)
+ return -EINVAL;
+
+ common->plane_switch = false;
+
+ ret = __kvm_arch_vcpu_ioctl_run(vcpu);
+ if (ret)
+ break;
+ } while (vcpu_plane0->common->plane_switch);
+
+ return ret;
+}
+
static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
--
2.53.0
^ permalink raw reply related
* [PATCH 06/60] KVM: SVM: Enable Restricted Injection for an SEV-SNP guest
From: Jörg Rödel @ 2026-06-08 14:41 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Melody Wang <huibo.wang@amd.com>
Enable Restricted Injection in an SEV-SNP guest by setting the corresponding
bit in the VMSA SEV features field (SEV_FEATURES[3]) from QEMU.
Add Restricted Injection to the supported hypervisor features.
Co-developed-by: Thomas Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Thomas Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Melody Wang <huibo.wang@amd.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/include/asm/sev-common.h | 1 +
arch/x86/kvm/svm/sev.c | 26 +++++++++++++++++++++++++-
3 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 1d506e5d6f46..41af7bd2473c 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -453,6 +453,7 @@
#define X86_FEATURE_SNP_SECURE_TSC (19*32+ 8) /* SEV-SNP Secure TSC */
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */
+#define X86_FEATURE_RESTRICTED_INJECTION (19*32+12) /* Restricted Injection */
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */
#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */
#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index 01a6e4dbe423..ee17a3541b55 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -136,6 +136,7 @@ enum psc_op {
#define GHCB_HV_FT_SNP BIT_ULL(0)
#define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1)
+#define GHCB_HV_FT_SNP_RINJ (BIT_ULL(2) | GHCB_HV_FT_SNP_AP_CREATION)
#define GHCB_HV_FT_SNP_MULTI_VMPL BIT_ULL(5)
/*
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 6d5d66563b0d..369fb1e36f58 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -39,7 +39,9 @@
#define GHCB_VERSION_MAX 2ULL
#define GHCB_VERSION_MIN 1ULL
-#define GHCB_HV_FT_SUPPORTED (GHCB_HV_FT_SNP | GHCB_HV_FT_SNP_AP_CREATION)
+#define GHCB_HV_FT_SUPPORTED (GHCB_HV_FT_SNP | \
+ GHCB_HV_FT_SNP_AP_CREATION | \
+ GHCB_HV_FT_SNP_RINJ)
/*
* The GHCB spec essentially states that all non-zero error codes other than
@@ -63,6 +65,10 @@ module_param_named(sev_es, sev_es_enabled, bool, 0444);
static bool __ro_after_init sev_snp_enabled = true;
module_param_named(sev_snp, sev_snp_enabled, bool, 0444);
+/* enable/disable SEV-SNP Restricted Injection support */
+static bool sev_snp_restricted_injection_enabled = true;
+module_param_named(restricted_injection, sev_snp_restricted_injection_enabled, bool, 0444);
+
static unsigned int __ro_after_init nr_ciphertext_hiding_asids;
module_param_named(ciphertext_hiding_asids, nr_ciphertext_hiding_asids, uint, 0444);
@@ -3223,6 +3229,12 @@ void __init sev_hardware_setup(void)
if (sev_snp_enabled && tsc_khz && cpu_feature_enabled(X86_FEATURE_SNP_SECURE_TSC))
sev_supported_vmsa_features |= SVM_SEV_FEAT_SECURE_TSC;
+
+ if (!sev_snp_enabled || !cpu_feature_enabled(X86_FEATURE_RESTRICTED_INJECTION))
+ sev_snp_restricted_injection_enabled = false;
+
+ if (sev_snp_restricted_injection_enabled)
+ sev_supported_vmsa_features |= SVM_SEV_FEAT_RESTRICTED_INJECTION;
}
void sev_hardware_unsetup(void)
@@ -4773,10 +4785,20 @@ void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm)
vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f));
}
+static void sev_snp_init_vmcb(struct vcpu_svm *svm)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+
+ /* V_NMI is not supported when Restricted Injection is enabled */
+ if (sev->vmsa_features & SVM_SEV_FEAT_RESTRICTED_INJECTION)
+ svm->vmcb->control.int_ctl &= ~V_NMI_ENABLE_MASK;
+}
+
static void sev_es_init_vmcb(struct vcpu_svm *svm, bool init_event)
{
struct kvm_sev_info *sev = to_kvm_sev_info(svm->vcpu.kvm);
struct vmcb *vmcb = svm->vmcb01.ptr;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
svm->vmcb->control.misc_ctl |= SVM_MISC_ENABLE_SEV_ES;
@@ -4843,6 +4865,8 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm, bool init_event)
set_ghcb_msr(svm, GHCB_MSR_SEV_INFO((__u64)sev->ghcb_version,
GHCB_VERSION_MIN,
sev_enc_bit));
+ if (is_sev_snp_guest(vcpu))
+ sev_snp_init_vmcb(svm);
}
void sev_init_vmcb(struct vcpu_svm *svm, bool init_event)
--
2.53.0
^ permalink raw reply related
* [PATCH 57/60] kvm: sev: Allow for VMPL level specification in AP create
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Tom Lendacky <thomas.lendacky@amd.com>
Update AP creation to support ADD/DESTROY of VMSAs at levels other than
VMPL0 in order to run under an SVSM at VMPL1 or lower. To maintain
backwards compatibility, the VMPL is specified in bits 16 to 19 of the
AP Creation request in SW_EXITINFO1 of the GHCB.
In order to track the VMSAs at different levels, create arrays for the
VMSAs, GHCBs, registered GHCBs and others. When switching VMPL levels,
these entries will be used to set the VMSA and GHCB physical addresses
in the VMCB for the VMPL level.
In order ensure that the proper responses are returned in the proper GHCB,
the GHCB must be unmapped at the current level and saved for restoration
later when switching back to that VMPL level.
Additional checks are applied to prevent a non-VMPL0 vCPU from being able
to perform an AP creation request at VMPL0. Additionally, a vCPU cannot
replace its own VMSA.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Co-developed-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/include/asm/svm.h | 9 +++
arch/x86/include/uapi/asm/svm.h | 2 +
arch/x86/kvm/svm/sev.c | 134 +++++++++++++++++++++++++-------
arch/x86/kvm/svm/svm.h | 1 +
arch/x86/kvm/x86.c | 9 +++
5 files changed, 126 insertions(+), 29 deletions(-)
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 9822b0b346ae..32a35ee10bce 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -345,6 +345,15 @@ static_assert((X2AVIC_4K_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AV
#define VMCB_ALLOWED_SEV_FEATURES_VALID BIT_ULL(63)
+enum {
+ SVM_SEV_VMPL0 = 0,
+ SVM_SEV_VMPL1,
+ SVM_SEV_VMPL2,
+ SVM_SEV_VMPL3,
+
+ SVM_SEV_VMPL_MAX
+};
+
struct vmcb_seg {
u16 selector;
u16 attrib;
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 91395b82eadd..60b7a52f6f7e 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -123,6 +123,8 @@
#define SVM_VMGEXIT_HVDB_QUERY 2
#define SVM_VMGEXIT_HVDB_CLEAR 3
#define SVM_VMGEXIT_HV_IPI 0x80000015ull
+#define SVM_VMGEXIT_AP_VMPL_MASK GENMASK(19, 16)
+#define SVM_VMGEXIT_AP_VMPL_SHIFT 16
#define SVM_VMGEXIT_GET_APIC_IDS 0x80000017ull
#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018ull
#define SVM_VMGEXIT_SAVIC 0x8000001aull
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index c0b2879f8e9f..53cd3aba7368 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -3512,13 +3512,19 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
if (!kvm_ghcb_sw_scratch_is_valid(svm))
goto vmgexit_err;
break;
- case SVM_VMGEXIT_AP_CREATION:
+ case SVM_VMGEXIT_AP_CREATION: {
+ unsigned int request;
+
if (!is_sev_snp_guest(vcpu))
goto vmgexit_err;
- if (lower_32_bits(control->exit_info_1) != SVM_VMGEXIT_AP_DESTROY)
+
+ request = lower_32_bits(control->exit_info_1);
+ request &= ~SVM_VMGEXIT_AP_VMPL_MASK;
+ if (request != SVM_VMGEXIT_AP_DESTROY)
if (!kvm_ghcb_rax_is_valid(svm))
goto vmgexit_err;
break;
+ }
case SVM_VMGEXIT_GET_APIC_IDS:
if (!kvm_ghcb_rax_is_valid(svm))
goto vmgexit_err;
@@ -4151,8 +4157,26 @@ static void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu)
/* Use the new VMSA */
svm->vmcb->control.vmsa_pa = pfn_to_hpa(pfn);
+ /*
+ * The vCPU may not have gone through the LAUNCH_UPDATE process, so mark
+ * the guest state as protected.
+ */
+ vcpu->arch.guest_state_protected = true;
+
+ /*
+ * SEV-ES guest mandates LBR Virtualization to be _always_ ON. Enable it
+ * only after setting guest_state_protected because KVM_SET_MSRS allows
+ * dynamic toggling of LBRV (for performance reason) on write access to
+ * MSR_IA32_DEBUGCTLMSR when guest_state_protected is not set.
+ */
+ svm_enable_lbrv(vcpu);
+
/* Mark the vCPU as runnable */
- kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
+ if (svm->sev_es.snp_ap_runnable) {
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
+ } else {
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_UNINITIALIZED);
+ }
/*
* gmem pages aren't currently migratable, but if this ever changes
@@ -4162,36 +4186,87 @@ static void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu)
kvm_release_page_clean(page);
}
-static int sev_snp_ap_creation(struct vcpu_svm *svm)
+static unsigned int get_ap_creation_request(struct vcpu_svm *svm)
{
- struct kvm_sev_info_plane *sev_plane = to_kvm_sev_info_plane(svm->vcpu.plane);
- struct kvm_vcpu *vcpu = &svm->vcpu;
- struct kvm_vcpu *target_vcpu;
- struct vcpu_svm *target_svm;
- unsigned int request;
+// struct kvm_sev_info_plane *sev_plane = to_kvm_sev_info_plane(svm->vcpu.plane);
+// struct kvm_vcpu *vcpu = &svm->vcpu;
+ unsigned int req = lower_32_bits(svm->vmcb->control.exit_info_1);
+
+ return req & ~SVM_VMGEXIT_AP_VMPL_MASK;
+}
+
+static unsigned int get_ap_creation_vmpl(struct vcpu_svm *svm)
+{
+ unsigned int req = lower_32_bits(svm->vmcb->control.exit_info_1);
+
+ return (req & SVM_VMGEXIT_AP_VMPL_MASK) >> SVM_VMGEXIT_AP_VMPL_SHIFT;
+}
+
+static unsigned int get_ap_creation_apic_id(struct vcpu_svm *svm)
+{
+ return upper_32_bits(svm->vmcb->control.exit_info_1);
+}
+
+#define SVM_SEV_VMPL_MAX 4
+
+static int sev_snp_ap_creation(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *target_svm = NULL, *svm = to_svm(vcpu);
+ struct kvm_sev_info_plane *target_sev_plane = NULL;
+ struct kvm_plane *target_plane = NULL;
+ struct kvm_vcpu *target_vcpu = NULL;
unsigned int apic_id;
+ unsigned int request;
+ unsigned int vmpl;
- request = lower_32_bits(svm->vmcb->control.exit_info_1);
- apic_id = upper_32_bits(svm->vmcb->control.exit_info_1);
+ request = get_ap_creation_request(svm);
+ apic_id = get_ap_creation_apic_id(svm);
+ vmpl = get_ap_creation_vmpl(svm);
- /* Validate the APIC ID */
- target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, apic_id);
- if (!target_vcpu) {
- vcpu_unimpl(vcpu, "vmgexit: invalid AP APIC ID [%#x] from guest\n",
- apic_id);
+ /* Validate the requested VMPL level */
+ if (vmpl >= SVM_SEV_VMPL_MAX) {
+ vcpu_unimpl(vcpu, "vmgexit: invalid VMPL level [%u] from guest\n",
+ vmpl);
return -EINVAL;
}
+ vmpl = array_index_nospec(vmpl, SVM_SEV_VMPL_MAX);
+
+ /* Obtain the target plane and vCPU */
+ target_plane = vcpu->kvm->planes[vmpl];
+ if (target_plane) {
+ target_vcpu = plane_get_vcpu(target_plane, apic_id);
+ } else {
+ target_vcpu = NULL;
+ }
+
+ /* Request user-space to create target plane VCPU if it does not exist */
+ if (!target_plane || !target_vcpu) {
+ vcpu->arch.complete_userspace_io = sev_snp_ap_creation;
+ return kvm_request_create_plane(vcpu, vmpl, apic_id);
+ }
target_svm = to_svm(target_vcpu);
+ target_sev_plane = &to_kvm_svm_plane(target_svm->vcpu.plane)->sev_info_plane;
guard(mutex)(&target_svm->sev_es.snp_vmsa_mutex);
+ /* VMPL0 can only be replaced by another vCPU running VMPL0 */
+ if (vmpl == SVM_SEV_VMPL0 &&
+ (vcpu == target_vcpu || vcpu->plane_level != SVM_SEV_VMPL0)) {
+ vcpu_unimpl(vcpu, "vmgexit: VMPL0 AP action not allowed\n");
+ return -EINVAL;
+ }
+
switch (request) {
case SVM_VMGEXIT_AP_CREATE_ON_INIT:
case SVM_VMGEXIT_AP_CREATE:
- if (vcpu->arch.regs[VCPU_REGS_RAX] != sev_plane->vmsa_features) {
+ /* Initialize target planes SEV features if necessary */
+ if (target_sev_plane->vmsa_features == 0)
+ target_sev_plane->vmsa_features = vcpu->arch.regs[VCPU_REGS_RAX];
+
+ if (vcpu->arch.regs[VCPU_REGS_RAX] != target_sev_plane->vmsa_features) {
vcpu_unimpl(vcpu, "vmgexit: mismatched AP sev_features [%#lx] != [%#llx] from guest\n",
- vcpu->arch.regs[VCPU_REGS_RAX], sev_plane->vmsa_features);
+ vcpu->arch.regs[VCPU_REGS_RAX], target_sev_plane->vmsa_features);
return -EINVAL;
}
@@ -4226,16 +4301,18 @@ static int sev_snp_ap_creation(struct vcpu_svm *svm)
return -EINVAL;
}
+ /* Signal the vCPU to update its state */
+ kvm_make_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, target_vcpu);
+
target_svm->sev_es.snp_ap_waiting_for_reset = true;
+ target_svm->sev_es.snp_ap_runnable = (request == SVM_VMGEXIT_AP_CREATE);
- /*
- * Unless Creation is deferred until INIT, signal the vCPU to update
- * its state.
- */
- if (request != SVM_VMGEXIT_AP_CREATE_ON_INIT)
- kvm_make_request_and_kick(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, target_vcpu);
+ if (request == SVM_VMGEXIT_AP_CREATE)
+ kvm_make_request(KVM_REQ_PLANE_RESCHED, target_vcpu);
- return 0;
+ kvm_vcpu_kick(target_vcpu);
+
+ return 1;
}
static int snp_handle_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa)
@@ -4779,12 +4856,11 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
ret = snp_begin_psc(svm);
break;
case SVM_VMGEXIT_AP_CREATION:
- ret = sev_snp_ap_creation(svm);
- if (ret) {
+ ret = sev_snp_ap_creation(vcpu);
+ if (ret < 0) {
svm_vmgexit_bad_input(svm, GHCB_ERR_INVALID_INPUT);
+ ret = 1;
}
-
- ret = 1;
break;
case SVM_VMGEXIT_GUEST_REQUEST:
ret = snp_handle_guest_req(svm, control->exit_info_1, control->exit_info_2);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 57033922ddcf..7e860f2abafb 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -282,6 +282,7 @@ struct vcpu_sev_es_state {
struct mutex snp_vmsa_mutex; /* Used to handle concurrent updates of VMSA. */
gpa_t snp_vmsa_gpa;
+ bool snp_ap_runnable;
bool snp_ap_waiting_for_reset;
bool snp_has_guest_vmsa;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0b9fa1059481..ad05350bb393 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12165,6 +12165,15 @@ static int __kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_vcpu_block(vcpu);
kvm_vcpu_srcu_read_lock(vcpu);
+ /*
+ * It is possible that the vCPU has never run before. If the
+ * request is to update the protected guest state (AP Create),
+ * then ensure that the vCPU can now run.
+ */
+ if (kvm_test_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, vcpu) &&
+ vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
if (kvm_apic_accept_events(vcpu) < 0) {
r = 0;
goto out;
--
2.53.0
^ permalink raw reply related
* [PATCH 33/60] KVM: Implement KVM_CREATE_VCPU ioctl for planes
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel, Carlos López
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Joerg Roedel <joerg.roedel@amd.com>
Implement the KVM_CREATE_VCPU ioctl per plane. Also introduce an
empty IOCTL path for the plane-vcpus, including per-architecture
call-backs.
Co-developed-by: Carlos López <clopez@suse.de>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/arm64/kvm/arm.c | 5 ++
arch/loongarch/kvm/vcpu.c | 5 ++
arch/mips/kvm/mips.c | 5 ++
arch/powerpc/kvm/powerpc.c | 5 ++
arch/riscv/kvm/vcpu.c | 5 ++
arch/s390/kvm/kvm-s390.c | 5 ++
arch/x86/kvm/x86.c | 29 ++++++++++++
include/linux/kvm_host.h | 12 +++--
virt/kvm/kvm_main.c | 97 ++++++++++++++++++++++++++++----------
9 files changed, 141 insertions(+), 27 deletions(-)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index d7a4b9b239dc..b2bfea5df7e0 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1824,6 +1824,11 @@ static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
return __kvm_arm_vcpu_set_events(vcpu, events);
}
+bool kvm_arch_is_vcpu_plane_ioctl(unsigned ioctl)
+{
+ return false;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index bab3c66ae58d..0b66b8186923 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -1232,6 +1232,11 @@ static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu,
return ret;
}
+bool kvm_arch_is_vcpu_plane_ioctl(unsigned ioctl)
+{
+ return false;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index e22d2a267e03..28795bad178b 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -933,6 +933,11 @@ long kvm_arch_vcpu_unlocked_ioctl(struct file *filp, unsigned int ioctl,
return -ENOIOCTLCMD;
}
+bool kvm_arch_is_vcpu_plane_ioctl(unsigned ioctl)
+{
+ return false;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg)
{
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 35658cded0cb..476f7ea02c79 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -2055,6 +2055,11 @@ long kvm_arch_vcpu_unlocked_ioctl(struct file *filp, unsigned int ioctl,
return -ENOIOCTLCMD;
}
+bool kvm_arch_is_vcpu_plane_ioctl(unsigned ioctl)
+{
+ return false;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 66cde226eb87..17680b659bdd 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -263,6 +263,11 @@ long kvm_arch_vcpu_unlocked_ioctl(struct file *filp, unsigned int ioctl,
return -ENOIOCTLCMD;
}
+bool kvm_arch_is_vcpu_plane_ioctl(unsigned ioctl)
+{
+ return false;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 94c40b2aa759..261859cb1bb6 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -5454,6 +5454,11 @@ static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
return ret;
}
+bool kvm_arch_is_vcpu_plane_ioctl(unsigned ioctl)
+{
+ return false;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d6bf0425525c..623838885753 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6227,6 +6227,35 @@ static int kvm_get_reg_list(struct kvm_vcpu *vcpu,
return 0;
}
+bool kvm_arch_is_vcpu_plane_ioctl(unsigned ioctl)
+{
+ switch (ioctl) {
+ case KVM_GET_DEBUGREGS:
+ case KVM_SET_DEBUGREGS:
+ case KVM_GET_LAPIC:
+ case KVM_SET_LAPIC:
+ case KVM_GET_MSRS:
+ case KVM_SET_MSRS:
+ case KVM_GET_NESTED_STATE:
+ case KVM_SET_NESTED_STATE:
+ case KVM_GET_ONE_REG:
+ case KVM_SET_ONE_REG:
+ case KVM_GET_SREGS2:
+ case KVM_SET_SREGS2:
+ case KVM_GET_VCPU_EVENTS:
+ case KVM_SET_VCPU_EVENTS:
+ case KVM_GET_XCRS:
+ case KVM_SET_XCRS:
+ case KVM_GET_XSAVE:
+ case KVM_GET_XSAVE2:
+ case KVM_SET_XSAVE:
+ case KVM_GET_REG_LIST:
+ return true;
+ default:
+ return false;
+ }
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 385e1ee8fd3a..b8c3f8f11cb4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1126,7 +1126,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
#define kvm_for_each_vcpu(idx, vcpup, kvm) \
plane_for_each_vcpu(idx, vcpup, kvm->planes[0])
-static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
+static inline struct kvm_vcpu *plane_get_vcpu_by_id(struct kvm_plane *plane, int id)
{
struct kvm_vcpu *vcpu = NULL;
unsigned long i;
@@ -1134,15 +1134,20 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
if (id < 0)
return NULL;
if (id < KVM_MAX_VCPUS)
- vcpu = kvm_get_vcpu(kvm, id);
+ vcpu = plane_get_vcpu(plane, id);
if (vcpu && vcpu->vcpu_id == id)
return vcpu;
- kvm_for_each_vcpu(i, vcpu, kvm)
+ plane_for_each_vcpu(i, vcpu, plane)
if (vcpu->vcpu_id == id)
return vcpu;
return NULL;
}
+static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
+{
+ return plane_get_vcpu_by_id(kvm->planes[0], id);
+}
+
static inline bool kvm_is_vcpu_creation_in_progress(struct kvm *kvm)
{
lockdep_assert_held(&kvm->lock);
@@ -1688,6 +1693,7 @@ bool kvm_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
+bool kvm_arch_is_vcpu_plane_ioctl(unsigned ioctl);
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
long kvm_arch_vcpu_unlocked_ioctl(struct file *filp,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2d0d5f4fd356..8839f91fd15e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -538,14 +538,11 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
{
vcpu->cpu = -1;
vcpu->kvm = kvm;
- vcpu->plane = kvm->planes[0];
vcpu->vcpu_id = id;
kvm_async_pf_vcpu_init(vcpu);
vcpu->last_used_slot = NULL;
- vcpu->plane_level = 0;
-
/* Fill the stats id string for the vcpu */
snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d",
task_pid_nr(current), id);
@@ -4306,9 +4303,13 @@ static struct file_operations kvm_vcpu_fops = {
*/
static int create_vcpu_fd(struct kvm_vcpu *vcpu)
{
- char name[8 + 1 + ITOA_MAX_LEN + 1];
+ char name[14 + 1 + (2 * ITOA_MAX_LEN) + 1];
+
+ if (vcpu->plane_level == 0)
+ snprintf(name, sizeof(name), "kvm-vcpu:%d", vcpu->vcpu_id);
+ else
+ snprintf(name, sizeof(name), "kvm-vcpu-plane%d:%d", vcpu->plane_level, vcpu->vcpu_id);
- snprintf(name, sizeof(name), "kvm-vcpu:%d", vcpu->vcpu_id);
return anon_inode_getfd(name, &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC);
}
@@ -4327,13 +4328,17 @@ DEFINE_SIMPLE_ATTRIBUTE(vcpu_get_pid_fops, vcpu_get_pid, NULL, "%llu\n");
static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
{
+ char dir_name[10 + (2 * ITOA_MAX_LEN) + 1];
struct dentry *debugfs_dentry;
- char dir_name[ITOA_MAX_LEN * 2];
if (!debugfs_initialized())
return;
- snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id);
+ if (vcpu->plane_level == 0)
+ snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id);
+ else
+ snprintf(dir_name, sizeof(dir_name), "vcpu%d-plane%d", vcpu->plane_level, vcpu->vcpu_id);
+
debugfs_dentry = debugfs_create_dir(dir_name,
vcpu->kvm->debugfs_dentry);
debugfs_create_file("pid", 0444, debugfs_dentry, vcpu,
@@ -4346,10 +4351,11 @@ static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
/*
* Creates some virtual cpus. Good luck creating more than one.
*/
-static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
+static int kvm_plane_ioctl_create_vcpu(struct kvm_plane *plane, unsigned long id)
{
- int r = -EINVAL;
+ struct kvm *kvm = plane->kvm;
struct kvm_vcpu *vcpu;
+ int r;
mutex_lock(&kvm->lock);
if (kvm->created_vcpus >= kvm->max_vcpus) {
@@ -4366,11 +4372,28 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
if (!vcpu)
return -ENOMEM;
- r = kvm_vcpu_init_common(vcpu, kvm, id);
- if (r)
+ r = -EEXIST;
+ if (plane_get_vcpu_by_id(plane, id))
goto vcpu_free;
+ if (plane->level > 0) {
+ struct kvm_vcpu *vcpu_plane0 = kvm_get_vcpu_by_id(kvm, id);
+
+ /* Plane0 VCPU must exist before creating non-plane0 VCPUs */
+ r = -EINVAL;
+ if (vcpu_plane0 == NULL)
+ goto vcpu_free;
+
+ vcpu->common = vcpu_plane0->common;
+ } else {
+ r = kvm_vcpu_init_common(vcpu, kvm, id);
+ if (r)
+ goto vcpu_free;
+ }
+
vcpu->vcpu_idx = vcpu->common->vcpu_idx;
+ vcpu->plane = plane;
+ vcpu->plane_level = plane->level;
vcpu->run = vcpu->common->run;
kvm_vcpu_init(vcpu, kvm, id);
@@ -4381,12 +4404,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
mutex_lock(&kvm->lock);
- if (kvm_get_vcpu_by_id(kvm, id)) {
- r = -EEXIST;
- goto unlock_vcpu_destroy;
- }
-
- r = xa_insert(&kvm->planes[0]->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT);
+ r = xa_insert(&plane->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT);
WARN_ON_ONCE(r == -EBUSY);
if (r)
goto unlock_vcpu_destroy;
@@ -4416,7 +4434,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
kvm_put_xa_erase:
kvm_vcpu_unlock(vcpu);
kvm_put_kvm_no_destroy(kvm);
- xa_erase(&kvm->planes[0]->vcpu_array, vcpu->vcpu_idx);
+ xa_erase(&plane->vcpu_array, vcpu->vcpu_idx);
unlock_vcpu_destroy:
mutex_unlock(&kvm->lock);
kvm_arch_vcpu_destroy(vcpu);
@@ -4550,7 +4568,7 @@ static int kvm_wait_for_vcpu_online(struct kvm_vcpu *vcpu)
/*
* Acquire and release the vCPU's mutex to wait for vCPU creation to
- * complete (kvm_vm_ioctl_create_vcpu() holds the mutex until the vCPU
+ * complete (kvm_plane_ioctl_create_vcpu() holds the mutex until the vCPU
* is fully online).
*/
if (mutex_lock_killable(kvm_vcpu_mutex(vcpu)))
@@ -4564,6 +4582,22 @@ static int kvm_wait_for_vcpu_online(struct kvm_vcpu *vcpu)
return 0;
}
+static inline bool kvm_is_vcpu_plane_ioctl(unsigned ioctl)
+{
+ switch (ioctl) {
+ case KVM_GET_FPU:
+ case KVM_SET_FPU:
+ case KVM_GET_REGS:
+ case KVM_SET_REGS:
+ case KVM_GET_SREGS:
+ case KVM_SET_SREGS:
+ case KVM_TRANSLATE:
+ return true;
+ default:
+ return kvm_arch_is_vcpu_plane_ioctl(ioctl);
+ }
+}
+
static long kvm_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -4576,6 +4610,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead)
return -EIO;
+ if (vcpu->plane_level > 0 && !kvm_is_vcpu_plane_ioctl(ioctl))
+ return -EINVAL;
+
if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
return -EINVAL;
@@ -4858,6 +4895,21 @@ static long kvm_vcpu_compat_ioctl(struct file *filp,
}
#endif
+static long __kvm_plane_ioctl(struct kvm_plane *plane, unsigned int ioctl, unsigned long arg)
+{
+ long r;
+
+ switch (ioctl) {
+ case KVM_CREATE_VCPU:
+ r = kvm_plane_ioctl_create_vcpu(plane, arg);
+ break;
+ default:
+ r = -ENOTTY;
+ }
+
+ return r;
+}
+
static long kvm_plane_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg)
{
@@ -4866,10 +4918,7 @@ static long kvm_plane_ioctl(struct file *filp, unsigned int ioctl,
if (plane->kvm->mm != current->mm || plane->kvm->vm_dead)
return -EIO;
- switch (ioctl) {
- default:
- return -ENOTTY;
- }
+ return __kvm_plane_ioctl(plane, ioctl, arg);
}
static int kvm_plane_release(struct inode *inode, struct file *filp)
@@ -5396,7 +5445,7 @@ static long kvm_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_create_plane(kvm, arg);
break;
case KVM_CREATE_VCPU:
- r = kvm_vm_ioctl_create_vcpu(kvm, arg);
+ r = __kvm_plane_ioctl(kvm->planes[0], ioctl, arg);
break;
case KVM_ENABLE_CAP: {
struct kvm_enable_cap cap;
--
2.53.0
^ permalink raw reply related
* [PATCH 15/60] kvm: Move VCPU scheduling state to struct kvm_vcpu_common
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Joerg Roedel <joerg.roedel@amd.com>
The scheduling state of the KVM VCPU is shared between all per-plane
VCPU objects. Move it to struct kvm_vcpu_common.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kvm/svm/svm.c | 2 +-
include/linux/kvm_host.h | 24 ++++++++++----------
virt/kvm/kvm_main.c | 47 +++++++++++++++++++++-------------------
3 files changed, 39 insertions(+), 34 deletions(-)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 1524c1bb4f37..f5cc30a6732f 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -229,7 +229,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
* and only if the vCPU is actively running, e.g. to
* avoid positives if userspace is stuffing state.
*/
- if (is_guest_mode(vcpu) && vcpu->wants_to_run)
+ if (is_guest_mode(vcpu) && vcpu->common->wants_to_run)
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
svm_leave_nested(vcpu);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d54f299218a4..a6aacd507c02 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -329,15 +329,21 @@ struct kvm_vcpu_common {
/* Currently active VCPU */
struct kvm_vcpu *current_vcpu;
+
+ /* Scheduling state */
+#ifdef CONFIG_PREEMPT_NOTIFIERS
+ struct preempt_notifier preempt_notifier;
+#endif
+ bool wants_to_run;
+ bool preempted;
+ bool ready;
+ bool scheduled_out;
};
struct kvm_vcpu {
struct kvm *kvm;
struct kvm_plane *plane;
-#ifdef CONFIG_PREEMPT_NOTIFIERS
- struct preempt_notifier preempt_notifier;
-#endif
int cpu;
int vcpu_id; /* id given by userspace at creation */
int vcpu_idx; /* index into kvm->planes[]->vcpu_array */
@@ -392,10 +398,6 @@ struct kvm_vcpu {
bool dy_eligible;
} spin_loop;
#endif
- bool wants_to_run;
- bool preempted;
- bool ready;
- bool scheduled_out;
struct kvm_vcpu_arch arch;
struct kvm_vcpu_stat stat;
char stats_id[KVM_STATS_NAME_SIZE];
@@ -416,22 +418,22 @@ struct kvm_vcpu {
static inline bool kvm_vcpu_wants_to_run(struct kvm_vcpu *vcpu)
{
- return vcpu->wants_to_run;
+ return vcpu->common->wants_to_run;
}
static inline bool kvm_vcpu_preempted(struct kvm_vcpu *vcpu)
{
- return READ_ONCE(vcpu->preempted);
+ return READ_ONCE(vcpu->common->preempted);
}
static inline bool kvm_vcpu_ready(struct kvm_vcpu *vcpu)
{
- return READ_ONCE(vcpu->ready);
+ return READ_ONCE(vcpu->common->ready);
}
static inline bool kvm_vcpu_scheduled_out(struct kvm_vcpu *vcpu)
{
- return vcpu->scheduled_out;
+ return vcpu->common->scheduled_out;
}
/*
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9c07321e30f4..a44f8dc8418a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -166,7 +166,7 @@ void vcpu_load(struct kvm_vcpu *vcpu)
int cpu = get_cpu();
__this_cpu_write(kvm_running_vcpu, vcpu->common);
- preempt_notifier_register(&vcpu->preempt_notifier);
+ preempt_notifier_register(&vcpu->common->preempt_notifier);
kvm_arch_vcpu_load(vcpu, cpu);
put_cpu();
}
@@ -176,7 +176,7 @@ void vcpu_put(struct kvm_vcpu *vcpu)
{
preempt_disable();
kvm_arch_vcpu_put(vcpu);
- preempt_notifier_unregister(&vcpu->preempt_notifier);
+ preempt_notifier_unregister(&vcpu->common->preempt_notifier);
__this_cpu_write(kvm_running_vcpu, NULL);
preempt_enable();
}
@@ -468,6 +468,12 @@ static int kvm_vcpu_init_common(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned
common->kvm = kvm;
common->current_vcpu = vcpu;
+
+ common->wants_to_run = false;
+ common->preempted = false;
+ common->ready = false;
+ preempt_notifier_init(&common->preempt_notifier, &kvm_preempt_ops);
+
vcpu->common = no_free_ptr(common);
return 0;
@@ -508,9 +514,6 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
kvm_vcpu_set_in_spin_loop(vcpu, false);
kvm_vcpu_set_dy_eligible(vcpu, false);
- vcpu->preempted = false;
- vcpu->ready = false;
- preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
vcpu->last_used_slot = NULL;
vcpu->plane_level = 0;
@@ -3927,7 +3930,7 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_halt);
bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
{
if (__kvm_vcpu_wake_up(vcpu)) {
- WRITE_ONCE(vcpu->ready, true);
+ WRITE_ONCE(vcpu->common->ready, true);
++vcpu->stat.generic.halt_wakeup;
return true;
}
@@ -4580,9 +4583,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
put_pid(oldpid);
}
- vcpu->wants_to_run = !READ_ONCE(vcpu->run->immediate_exit__unsafe);
+ vcpu->common->wants_to_run = !READ_ONCE(vcpu->run->immediate_exit__unsafe);
r = kvm_arch_vcpu_ioctl_run(vcpu);
- vcpu->wants_to_run = false;
+ vcpu->common->wants_to_run = false;
/*
* FIXME: Remove this hack once all KVM architectures
@@ -6488,36 +6491,36 @@ static void kvm_init_debug(void)
}
static inline
-struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
+struct kvm_vcpu_common *preempt_notifier_to_vcpu_common(struct preempt_notifier *pn)
{
- return container_of(pn, struct kvm_vcpu, preempt_notifier);
+ return container_of(pn, struct kvm_vcpu_common, preempt_notifier);
}
static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
{
- struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
+ struct kvm_vcpu_common *common = preempt_notifier_to_vcpu_common(pn);
- WRITE_ONCE(vcpu->preempted, false);
- WRITE_ONCE(vcpu->ready, false);
+ WRITE_ONCE(common->preempted, false);
+ WRITE_ONCE(common->ready, false);
- __this_cpu_write(kvm_running_vcpu, vcpu->common);
- kvm_arch_vcpu_load(vcpu, cpu);
+ __this_cpu_write(kvm_running_vcpu, common);
+ kvm_arch_vcpu_load(common->current_vcpu, cpu);
- WRITE_ONCE(vcpu->scheduled_out, false);
+ WRITE_ONCE(common->scheduled_out, false);
}
static void kvm_sched_out(struct preempt_notifier *pn,
struct task_struct *next)
{
- struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
+ struct kvm_vcpu_common *common = preempt_notifier_to_vcpu_common(pn);
- WRITE_ONCE(vcpu->scheduled_out, true);
+ WRITE_ONCE(common->scheduled_out, true);
- if (task_is_runnable(current) && kvm_vcpu_wants_to_run(vcpu)) {
- WRITE_ONCE(vcpu->preempted, true);
- WRITE_ONCE(vcpu->ready, true);
+ if (task_is_runnable(current) && common->wants_to_run) {
+ WRITE_ONCE(common->preempted, true);
+ WRITE_ONCE(common->ready, true);
}
- kvm_arch_vcpu_put(vcpu);
+ kvm_arch_vcpu_put(common->current_vcpu);
__this_cpu_write(kvm_running_vcpu, NULL);
}
--
2.53.0
^ permalink raw reply related
* [PATCH 43/60] kvm: x86: Move CPUID state to struct kvm_vcpu_arch_common
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Joerg Roedel <joerg.roedel@amd.com>
The CPUID state is shared across all planes, so move it to struct
kvm_vcpu_arch_common.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/include/asm/kvm_host.h | 17 ++++++++--------
arch/x86/kvm/cpuid.c | 36 +++++++++++++++++++--------------
arch/x86/kvm/cpuid.h | 14 ++++++++++---
arch/x86/kvm/lapic.c | 2 +-
arch/x86/kvm/smm.c | 2 +-
arch/x86/kvm/svm/svm.c | 2 +-
arch/x86/kvm/vmx/vmx.c | 2 +-
arch/x86/kvm/x86.c | 17 ++++++++++++----
8 files changed, 58 insertions(+), 34 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 11e52f8bb2c2..3a64bdae6e23 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -794,10 +794,16 @@ enum kvm_only_cpuid_leafs {
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
};
-struct kvm_vcpu_arch_common {};
+struct kvm_vcpu_arch_common {
+ /* CPUID related state */
+ int cpuid_nent;
+ struct kvm_cpuid_entry2 *cpuid_entries;
+ bool cpuid_dynamic_bits_dirty;
+ bool is_amd_compatible;
+};
-static inline int kvm_arch_vcpu_common_init(struct kvm_vcpu_common *common) { return 0; }
-static inline void kvm_arch_vcpu_common_destroy(struct kvm_vcpu_common *common) {}
+int kvm_arch_vcpu_common_init(struct kvm_vcpu_common *common);
+void kvm_arch_vcpu_common_destroy(struct kvm_vcpu_common *common);
struct kvm_vcpu_arch {
/*
@@ -919,11 +925,6 @@ struct kvm_vcpu_arch {
int halt_request; /* real mode on Intel only */
- int cpuid_nent;
- struct kvm_cpuid_entry2 *cpuid_entries;
- bool cpuid_dynamic_bits_dirty;
- bool is_amd_compatible;
-
/*
* cpu_caps holds the effective guest capabilities, i.e. the features
* the vCPU is allowed to use. Typically, but not always, features can
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e69156b54cff..6d948d63306c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -176,6 +176,7 @@ static void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
int nent)
{
+ struct kvm_vcpu_common *common = vcpu->common;
struct kvm_cpuid_entry2 *orig;
int i;
@@ -188,11 +189,11 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2
kvm_update_cpuid_runtime(vcpu);
kvm_apply_cpuid_pv_features_quirk(vcpu);
- if (nent != vcpu->arch.cpuid_nent)
+ if (nent != common->arch.cpuid_nent)
return -EINVAL;
for (i = 0; i < nent; i++) {
- orig = &vcpu->arch.cpuid_entries[i];
+ orig = &common->arch.cpuid_entries[i];
if (e2[i].function != orig->function ||
e2[i].index != orig->index ||
e2[i].flags != orig->flags ||
@@ -290,7 +291,7 @@ static void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
- vcpu->arch.cpuid_dynamic_bits_dirty = false;
+ vcpu->common->arch.cpuid_dynamic_bits_dirty = false;
best = kvm_find_cpuid_entry(vcpu, 1);
if (best) {
@@ -374,6 +375,7 @@ static int cpuid_func_emulated(struct kvm_cpuid_entry2 *entry, u32 func,
void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
+ struct kvm_vcpu_common *common = vcpu->common;
struct kvm_lapic *apic = vcpu->arch.apic;
struct kvm_cpuid_entry2 *best;
struct kvm_cpuid_entry2 *entry;
@@ -443,7 +445,7 @@ void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vcpu->arch.pv_cpuid.features = kvm_apply_cpuid_pv_features_quirk(vcpu);
- vcpu->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu);
+ common->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
@@ -509,6 +511,7 @@ u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu)
static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
int nent)
{
+ struct kvm_vcpu_common *common = vcpu->common;
u32 vcpu_caps[NR_KVM_CPU_CAPS];
int r;
@@ -516,7 +519,7 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
* Apply pending runtime CPUID updates to the current CPUID entries to
* avoid false positives due to mismatches on KVM-owned feature flags.
*/
- if (vcpu->arch.cpuid_dynamic_bits_dirty)
+ if (common->arch.cpuid_dynamic_bits_dirty)
kvm_update_cpuid_runtime(vcpu);
/*
@@ -530,8 +533,8 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
* updates. Full initialization is done if and only if the vCPU hasn't
* run, i.e. only if userspace is potentially changing CPUID features.
*/
- swap(vcpu->arch.cpuid_entries, e2);
- swap(vcpu->arch.cpuid_nent, nent);
+ swap(common->arch.cpuid_entries, e2);
+ swap(common->arch.cpuid_nent, nent);
memcpy(vcpu_caps, vcpu->arch.cpu_caps, sizeof(vcpu_caps));
BUILD_BUG_ON(sizeof(vcpu_caps) != sizeof(vcpu->arch.cpu_caps));
@@ -580,8 +583,8 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
err:
memcpy(vcpu->arch.cpu_caps, vcpu_caps, sizeof(vcpu_caps));
- swap(vcpu->arch.cpuid_entries, e2);
- swap(vcpu->arch.cpuid_nent, nent);
+ swap(common->arch.cpuid_entries, e2);
+ swap(common->arch.cpuid_nent, nent);
return r;
}
@@ -658,17 +661,19 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries)
{
- if (cpuid->nent < vcpu->arch.cpuid_nent)
+ struct kvm_vcpu_common *common = vcpu->common;
+
+ if (cpuid->nent < common->arch.cpuid_nent)
return -E2BIG;
- if (vcpu->arch.cpuid_dynamic_bits_dirty)
+ if (common->arch.cpuid_dynamic_bits_dirty)
kvm_update_cpuid_runtime(vcpu);
- if (copy_to_user(entries, vcpu->arch.cpuid_entries,
- vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
+ if (copy_to_user(entries, common->arch.cpuid_entries,
+ common->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
return -EFAULT;
- cpuid->nent = vcpu->arch.cpuid_nent;
+ cpuid->nent = common->arch.cpuid_nent;
return 0;
}
@@ -2089,10 +2094,11 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
u32 *ecx, u32 *edx, bool exact_only)
{
u32 orig_function = *eax, function = *eax, index = *ecx;
+ struct kvm_vcpu_common *common = vcpu->common;
struct kvm_cpuid_entry2 *entry;
bool exact, used_max_basic = false;
- if (vcpu->arch.cpuid_dynamic_bits_dirty)
+ if (common->arch.cpuid_dynamic_bits_dirty)
kvm_update_cpuid_runtime(vcpu);
entry = kvm_find_cpuid_entry_index(vcpu, function, index);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 039b8e6f40ba..143ea8531611 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -36,14 +36,18 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry2(struct kvm_cpuid_entry2 *entries,
static inline struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
u32 function, u32 index)
{
- return kvm_find_cpuid_entry2(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent,
+ struct kvm_vcpu_common *common = vcpu->common;
+
+ return kvm_find_cpuid_entry2(common->arch.cpuid_entries, common->arch.cpuid_nent,
function, index);
}
static inline struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function)
{
- return kvm_find_cpuid_entry2(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent,
+ struct kvm_vcpu_common *common = vcpu->common;
+
+ return kvm_find_cpuid_entry2(common->arch.cpuid_entries, common->arch.cpuid_nent,
function, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
}
@@ -135,7 +139,7 @@ static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu,
static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.is_amd_compatible;
+ return vcpu->common->arch.is_amd_compatible;
}
static inline bool guest_cpuid_is_intel_compatible(struct kvm_vcpu *vcpu)
@@ -300,4 +304,8 @@ static inline bool guest_has_pred_cmd_msr(struct kvm_vcpu *vcpu)
guest_cpu_cap_has(vcpu, X86_FEATURE_SBPB));
}
+static inline void cpuid_set_dirty(struct kvm_vcpu *vcpu)
+{
+ vcpu->common->arch.cpuid_dynamic_bits_dirty = true;
+}
#endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index cac076445472..dc7a08831a54 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2754,7 +2754,7 @@ static void __kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value)
vcpu->arch.apic_base = value;
if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
- vcpu->arch.cpuid_dynamic_bits_dirty = true;
+ cpuid_set_dirty(vcpu);
if (!apic)
return;
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index f623c5986119..736ab345b9fd 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -363,7 +363,7 @@ void enter_smm(struct kvm_vcpu *vcpu)
goto error;
#endif
- vcpu->arch.cpuid_dynamic_bits_dirty = true;
+ cpuid_set_dirty(vcpu);
kvm_mmu_reset_context(vcpu);
return;
error:
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e8ad880a4266..612db7ad8b2a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1848,7 +1848,7 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
- vcpu->arch.cpuid_dynamic_bits_dirty = true;
+ cpuid_set_dirty(vcpu);
}
static void svm_set_segment(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 20262855bfe8..62e180651143 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3595,7 +3595,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
vmcs_writel(GUEST_CR4, hw_cr4);
if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
- vcpu->arch.cpuid_dynamic_bits_dirty = true;
+ cpuid_set_dirty(vcpu);
}
void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7fc08df245bd..7e94a378b3d2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1322,7 +1322,7 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
vcpu->arch.xcr0 = xcr0;
if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
- vcpu->arch.cpuid_dynamic_bits_dirty = true;
+ cpuid_set_dirty(vcpu);
return 0;
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_set_xcr);
@@ -4089,7 +4089,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_XMM3))
return 1;
vcpu->arch.ia32_misc_enable_msr = data;
- vcpu->arch.cpuid_dynamic_bits_dirty = true;
+ cpuid_set_dirty(vcpu);
} else {
vcpu->arch.ia32_misc_enable_msr = data;
}
@@ -4121,7 +4121,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (vcpu->arch.ia32_xss == data)
break;
vcpu->arch.ia32_xss = data;
- vcpu->arch.cpuid_dynamic_bits_dirty = true;
+ cpuid_set_dirty(vcpu);
break;
case MSR_SMI_COUNT:
if (!msr_info->host_initiated)
@@ -13034,7 +13034,16 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_mmu_destroy(vcpu);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
free_page((unsigned long)vcpu->arch.pio_data);
- kvfree(vcpu->arch.cpuid_entries);
+}
+
+int kvm_arch_vcpu_common_init(struct kvm_vcpu_common *common)
+{
+ return 0;
+}
+
+void kvm_arch_vcpu_common_destroy(struct kvm_vcpu_common *common)
+{
+ kvfree(common->arch.cpuid_entries);
}
static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event)
--
2.53.0
^ permalink raw reply related
* [PATCH 09/60] kvm: Introduce struct kvm_plane
From: Jörg Rödel @ 2026-06-08 14:42 UTC (permalink / raw)
To: Paolo Bonzini, Sean Christopherson
Cc: Tom Lendacky, ashish.kalra, michael.roth, nsaenz, anelkz,
James.Bottomley, Melody Wang, kvm, linux-kernel, kvmarm,
loongarch, linux-mips, linuxppc-dev, kvm-riscv, x86, coconut-svsm,
joerg.roedel
In-Reply-To: <20260608144252.351443-1-joro@8bytes.org>
From: Paolo Bonzini <pbonzini@redhat.com>
Introduce a data structure to keep VM-wide per-plane state. Initialize
the structure with a back-pointer to struct kvm and the plane level
the structure represents.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Co-developed-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
include/linux/kvm_host.h | 7 +++++++
include/uapi/linux/kvm.h | 6 ++++++
virt/kvm/kvm_main.c | 41 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 54 insertions(+)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4c14aee1fb06..5be4c9f118b4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -767,6 +767,11 @@ struct kvm_memslots {
int node_idx;
};
+struct kvm_plane {
+ struct kvm *kvm;
+ unsigned level;
+};
+
struct kvm {
#ifdef KVM_HAVE_MMU_RWLOCK
rwlock_t mmu_lock;
@@ -806,6 +811,8 @@ struct kvm {
spinlock_t gpc_lock;
struct list_head gpc_list;
+ struct kvm_plane *planes[KVM_MAX_PLANES];
+
/*
* created_vcpus is protected by kvm->lock, and is incremented
* at the beginning of KVM_CREATE_VCPU. online_vcpus is only
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6c8afa2047bf..813f964a6dc1 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -730,6 +730,11 @@ struct kvm_enable_cap {
#define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
#define KVM_GET_MSR_FEATURE_INDEX_LIST _IOWR(KVMIO, 0x0a, struct kvm_msr_list)
+/*
+ * Maximum number of supported planes
+ */
+#define KVM_MAX_PLANES 16
+
/*
* Extension capability list.
*/
@@ -996,6 +1001,7 @@ struct kvm_enable_cap {
#define KVM_CAP_S390_USER_OPEREXEC 246
#define KVM_CAP_S390_KEYOP 247
#define KVM_CAP_S390_VSIE_ESAMODE 248
+#define KVM_CAP_PLANES 249
struct kvm_irq_routing_irqchip {
__u32 irqchip;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 881f92d7a469..a68469c6d12e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1095,6 +1095,38 @@ static inline struct kvm_io_bus *kvm_get_bus_for_destruction(struct kvm *kvm,
static int kvm_enable_virtualization(void);
static void kvm_disable_virtualization(void);
+static struct kvm_plane *kvm_create_plane(struct kvm *kvm, unsigned plane_level)
+{
+ struct kvm_plane *plane = kzalloc(sizeof(*plane), GFP_KERNEL_ACCOUNT);
+
+ if (!plane)
+ return NULL;
+
+ plane->kvm = kvm;
+ plane->level = plane_level;
+
+ kvm->planes[plane_level] = plane;
+
+ return plane;
+}
+
+static void kvm_destroy_one_plane(struct kvm_plane *plane)
+{
+ kfree(plane);
+}
+
+static void kvm_destroy_planes(struct kvm *kvm)
+{
+ int i;
+
+ for (i = 0; i < KVM_MAX_PLANES; ++i) {
+ if (kvm->planes[i] == NULL)
+ continue;
+ kvm_destroy_one_plane(kvm->planes[i]);
+ kvm->planes[i] = NULL;
+ }
+}
+
static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
{
struct kvm *kvm = kvm_arch_alloc_vm();
@@ -1127,6 +1159,12 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
+ /* Initialize planes array and allocate plane 0 */
+ if (kvm_create_plane(kvm, 0) == NULL) {
+ r = -ENOMEM;
+ goto out_no_planes;
+ }
+
/*
* Force subsequent debugfs file creations to fail if the VM directory
* is not created (by kvm_create_vm_debugfs()).
@@ -1225,6 +1263,8 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
out_err_no_irq_srcu:
cleanup_srcu_struct(&kvm->srcu);
out_err_no_srcu:
+ kvm_destroy_planes(kvm);
+out_no_planes:
kvm_arch_free_vm(kvm);
mmdrop(current->mm);
return ERR_PTR(r);
@@ -1304,6 +1344,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
xa_destroy(&kvm->mem_attr_array);
#endif
kvm_arch_free_vm(kvm);
+ kvm_destroy_planes(kvm);
preempt_notifier_dec();
kvm_disable_virtualization();
mmdrop(mm);
--
2.53.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox