* [PATCH 0/2] KVM: SVM: Fix a NULL VMSA deref with MOVE_ENC_CONTEXT
@ 2025-06-02 22:44 Sean Christopherson
2025-06-02 22:44 ` [PATCH 1/2] KVM: SVM: Reject SEV{-ES} intra host migration if vCPU creation is in-flight Sean Christopherson
` (3 more replies)
0 siblings, 4 replies; 9+ messages in thread
From: Sean Christopherson @ 2025-06-02 22:44 UTC (permalink / raw)
To: Sean Christopherson, Paolo Bonzini
Cc: kvm, linux-kernel, Alexander Potapenko, James Houghton,
Peter Gonda, Tom Lendacky
Fix a NULL VMSA deref bug (which is probably the tip of the iceberg with
respect to what all can go wrong) due to a race between KVM_CREATE_VCPU and
KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, where a non-SEV-ES vCPU can be created in
an SEV-ES VM.
Found by running syzkaller on a bare metal SEV-ES host. C repro below.
Sean Christopherson (2):
KVM: SVM: Reject SEV{-ES} intra host migration if vCPU creation is
in-flight
KVM: SVM: Initialize vmsa_pa in VMCB to INVALID_PAGE if VMSA page is
NULL
arch/x86/kvm/svm/sev.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
base-commit: 0ff41df1cb268fc69e703a08a57ee14ae967d0ca
--
2.49.0.1204.g71687c7c1d-goog
// autogenerated by syzkaller (https://github.com/google/syzkaller)
#define _GNU_SOURCE
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <sys/wait.h>
#include <linux/futex.h>
#include <linux/kvm.h>
static unsigned long long procid;
static void sleep_ms(uint64_t ms)
{
usleep(ms * 1000);
}
static uint64_t current_time_ms(void)
{
struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC, &ts))
exit(1);
return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}
static void thread_start(void* (*fn)(void*), void* arg)
{
pthread_t th;
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setstacksize(&attr, 128 << 10);
int i = 0;
for (; i < 100; i++) {
if (pthread_create(&th, &attr, fn, arg) == 0) {
pthread_attr_destroy(&attr);
return;
}
if (errno == EAGAIN) {
usleep(50);
continue;
}
break;
}
exit(1);
}
typedef struct {
int state;
} event_t;
static void event_init(event_t* ev)
{
ev->state = 0;
}
static void event_reset(event_t* ev)
{
ev->state = 0;
}
static void event_set(event_t* ev)
{
if (ev->state)
exit(1);
__atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1000000);
}
static void event_wait(event_t* ev)
{
while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
}
static int event_isset(event_t* ev)
{
return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
}
static int event_timedwait(event_t* ev, uint64_t timeout)
{
uint64_t start = current_time_ms();
uint64_t now = start;
for (;;) {
uint64_t remain = timeout - (now - start);
struct timespec ts;
ts.tv_sec = remain / 1000;
ts.tv_nsec = (remain % 1000) * 1000 * 1000;
syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
if (__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
return 1;
now = current_time_ms();
if (now - start > timeout)
return 0;
}
}
struct thread_t {
int created, call;
event_t ready, done;
};
static struct thread_t threads[16];
static void execute_call(int call);
static int running;
static void* thr(void* arg)
{
struct thread_t* th = (struct thread_t*)arg;
for (;;) {
event_wait(&th->ready);
event_reset(&th->ready);
execute_call(th->call);
__atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED);
event_set(&th->done);
}
return 0;
}
static void execute_one(void)
{
if (write(1, "executing program\n", sizeof("executing program\n") - 1)) {
}
int i, call, thread;
for (call = 0; call < 9; call++) {
for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0]));
thread++) {
struct thread_t* th = &threads[thread];
if (!th->created) {
th->created = 1;
event_init(&th->ready);
event_init(&th->done);
event_set(&th->done);
thread_start(thr, th);
}
if (!event_isset(&th->done))
continue;
event_reset(&th->done);
th->call = call;
__atomic_fetch_add(&running, 1, __ATOMIC_RELAXED);
event_set(&th->ready);
if (call == 2 || call == 5 || call == 7)
break;
event_timedwait(&th->done, 50);
break;
}
}
for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++)
sleep_ms(1);
}
static void loop(void)
{
int iter = 0;
for (; iter < 100; iter++) {
int pid = fork();
if (pid < 0)
exit(1);
if (pid == 0) {
execute_one();
exit(0);
}
int status = 0;
uint64_t start = current_time_ms();
for (;;) {
sleep_ms(10);
if (waitpid(-1, &status, WNOHANG | __WALL) == pid)
break;
if (current_time_ms() - start < 5000)
continue;
break;
}
}
}
uint64_t r[4] = {0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff,
0xffffffffffffffff};
void execute_call(int call)
{
switch (call) {
case 0:
r[1] = syscall(__NR_ioctl, /*fd=*/r[0], /*cmd=*/KVM_CREATE_VM, /*type=*/0ul);
break;
case 3:
r[3] = syscall(__NR_ioctl, /*fd=*/r[0], /*cmd=*/KVM_CREATE_VM, /*type=*/0ul);
break;
case 5:
syscall(__NR_ioctl, /*fd=*/r[3], /*cmd=*/KVM_CREATE_VCPU, /*id=*/0ul);
for (int i = 0; i < 32; i++) {
syscall(__NR_ioctl, /*fd=*/r[3], /*cmd=*/KVM_CREATE_VCPU, /*id=*/0ul);
}
break;
case 6:
*(uint64_t*)0x200000000040 = 1;
*(uint32_t*)0x200000000048 = 8;
*(uint32_t*)0x20000000004c = 0;
*(uint64_t*)0x200000000050 = 0x5625e9b0;
*(uint64_t*)0x200000000058 = 0;
memset((void*)0x200000000060, 0, 16);
syscall(__NR_ioctl, /*fd=*/r[1], /*cmd=*/KVM_MEMORY_ENCRYPT_OP,
/*arg=*/0x200000000040ul);
for (int i = 0; i < 32; i++) {
syscall(__NR_ioctl, /*fd=*/r[1], /*cmd=*/KVM_MEMORY_ENCRYPT_OP,
/*arg=*/0x200000000040ul);
}
break;
case 7:
*(uint32_t*)0x200000000080 = KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM;
*(uint32_t*)0x200000000084 = 0;
*(uint32_t*)0x200000000088 = r[1];
syscall(__NR_ioctl, /*fd=*/r[3], /*cmd=*/KVM_ENABLE_CAP,
/*arg=*/0x200000000080ul);
break;
}
}
int main(void)
{
syscall(__NR_mmap, /*addr=*/0x1ffffffff000ul, /*len=*/0x1000ul, /*prot=*/0ul,
/*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul,
/*fd=*/(intptr_t)-1, /*offset=*/0ul);
syscall(__NR_mmap, /*addr=*/0x200000000000ul, /*len=*/0x1000000ul,
/*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/ 7ul,
/*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul,
/*fd=*/(intptr_t)-1, /*offset=*/0ul);
syscall(__NR_mmap, /*addr=*/0x200001000000ul, /*len=*/0x1000ul, /*prot=*/0ul,
/*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul,
/*fd=*/(intptr_t)-1, /*offset=*/0ul);
for (procid = 0; procid < 10; procid++) {
if (fork() == 0) {
r[0] = open("/dev/kvm", O_RDWR);
loop();
}
}
sleep(1000000);
return 0;
}
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/2] KVM: SVM: Reject SEV{-ES} intra host migration if vCPU creation is in-flight
2025-06-02 22:44 [PATCH 0/2] KVM: SVM: Fix a NULL VMSA deref with MOVE_ENC_CONTEXT Sean Christopherson
@ 2025-06-02 22:44 ` Sean Christopherson
2025-06-03 15:21 ` James Houghton
2025-06-04 11:15 ` Liam Merwick
2025-06-02 22:44 ` [PATCH 2/2] KVM: SVM: Initialize vmsa_pa in VMCB to INVALID_PAGE if VMSA page is NULL Sean Christopherson
` (2 subsequent siblings)
3 siblings, 2 replies; 9+ messages in thread
From: Sean Christopherson @ 2025-06-02 22:44 UTC (permalink / raw)
To: Sean Christopherson, Paolo Bonzini
Cc: kvm, linux-kernel, Alexander Potapenko, James Houghton,
Peter Gonda, Tom Lendacky
Reject migration of SEV{-ES} state if either the source or destination VM
is actively creating a vCPU, i.e. if kvm_vm_ioctl_create_vcpu() is in the
section between incrementing created_vcpus and online_vcpus. The bulk of
vCPU creation runs _outside_ of kvm->lock to allow creating multiple vCPUs
in parallel, and so sev_info.es_active can get toggled from false=>true in
the destination VM after (or during) svm_vcpu_create(), resulting in an
SEV{-ES} VM effectively having a non-SEV{-ES} vCPU.
The issue manifests most visibly as a crash when trying to free a vCPU's
NULL VMSA page in an SEV-ES VM, but any number of things can go wrong.
BUG: unable to handle page fault for address: ffffebde00000000
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 0 P4D 0
Oops: Oops: 0000 [#1] SMP KASAN NOPTI
CPU: 227 UID: 0 PID: 64063 Comm: syz.5.60023 Tainted: G U O 6.15.0-smp-DEV #2 NONE
Tainted: [U]=USER, [O]=OOT_MODULE
Hardware name: Google, Inc. Arcadia_IT_80/Arcadia_IT_80, BIOS 12.52.0-0 10/28/2024
RIP: 0010:constant_test_bit arch/x86/include/asm/bitops.h:206 [inline]
RIP: 0010:arch_test_bit arch/x86/include/asm/bitops.h:238 [inline]
RIP: 0010:_test_bit include/asm-generic/bitops/instrumented-non-atomic.h:142 [inline]
RIP: 0010:PageHead include/linux/page-flags.h:866 [inline]
RIP: 0010:___free_pages+0x3e/0x120 mm/page_alloc.c:5067
Code: <49> f7 06 40 00 00 00 75 05 45 31 ff eb 0c 66 90 4c 89 f0 4c 39 f0
RSP: 0018:ffff8984551978d0 EFLAGS: 00010246
RAX: 0000777f80000001 RBX: 0000000000000000 RCX: ffffffff918aeb98
RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffffebde00000000
RBP: 0000000000000000 R08: ffffebde00000007 R09: 1ffffd7bc0000000
R10: dffffc0000000000 R11: fffff97bc0000001 R12: dffffc0000000000
R13: ffff8983e19751a8 R14: ffffebde00000000 R15: 1ffffd7bc0000000
FS: 0000000000000000(0000) GS:ffff89ee661d3000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffebde00000000 CR3: 000000793ceaa000 CR4: 0000000000350ef0
DR0: 0000000000000000 DR1: 0000000000000b5f DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
sev_free_vcpu+0x413/0x630 arch/x86/kvm/svm/sev.c:3169
svm_vcpu_free+0x13a/0x2a0 arch/x86/kvm/svm/svm.c:1515
kvm_arch_vcpu_destroy+0x6a/0x1d0 arch/x86/kvm/x86.c:12396
kvm_vcpu_destroy virt/kvm/kvm_main.c:470 [inline]
kvm_destroy_vcpus+0xd1/0x300 virt/kvm/kvm_main.c:490
kvm_arch_destroy_vm+0x636/0x820 arch/x86/kvm/x86.c:12895
kvm_put_kvm+0xb8e/0xfb0 virt/kvm/kvm_main.c:1310
kvm_vm_release+0x48/0x60 virt/kvm/kvm_main.c:1369
__fput+0x3e4/0x9e0 fs/file_table.c:465
task_work_run+0x1a9/0x220 kernel/task_work.c:227
exit_task_work include/linux/task_work.h:40 [inline]
do_exit+0x7f0/0x25b0 kernel/exit.c:953
do_group_exit+0x203/0x2d0 kernel/exit.c:1102
get_signal+0x1357/0x1480 kernel/signal.c:3034
arch_do_signal_or_restart+0x40/0x690 arch/x86/kernel/signal.c:337
exit_to_user_mode_loop kernel/entry/common.c:111 [inline]
exit_to_user_mode_prepare include/linux/entry-common.h:329 [inline]
__syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline]
syscall_exit_to_user_mode+0x67/0xb0 kernel/entry/common.c:218
do_syscall_64+0x7c/0x150 arch/x86/entry/syscall_64.c:100
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7f87a898e969
</TASK>
Modules linked in: gq(O)
gsmi: Log Shutdown Reason 0x03
CR2: ffffebde00000000
---[ end trace 0000000000000000 ]---
Deliberately don't check for a NULL VMSA when freeing the vCPU, as crashing
the host is likely desirable due to the VMSA being consumed by hardware.
E.g. if KVM manages to allow VMRUN on the vCPU, hardware may read/write a
bogus VMSA page. Accessing PFN 0 is "fine"-ish now that it's sequestered
away thanks to L1TF, but panicking in this scenario is preferable to
potentially running with corrupted state.
Reported-by: Alexander Potapenko <glider@google.com>
Tested-by: Alexander Potapenko <glider@google.com>
Fixes: 0b020f5af092 ("KVM: SEV: Add support for SEV-ES intra host migration")
Fixes: b56639318bb2 ("KVM: SEV: Add support for SEV intra host migration")
Cc: stable@vger.kernel.org
Cc: James Houghton <jthoughton@google.com>
Cc: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/kvm/svm/sev.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index a7a7dc507336..93d899454535 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2032,6 +2032,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src)
struct kvm_vcpu *src_vcpu;
unsigned long i;
+ if (src->created_vcpus != atomic_read(&src->online_vcpus) ||
+ dst->created_vcpus != atomic_read(&dst->online_vcpus))
+ return -EINVAL;
+
if (!sev_es_guest(src))
return 0;
--
2.49.0.1204.g71687c7c1d-goog
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/2] KVM: SVM: Initialize vmsa_pa in VMCB to INVALID_PAGE if VMSA page is NULL
2025-06-02 22:44 [PATCH 0/2] KVM: SVM: Fix a NULL VMSA deref with MOVE_ENC_CONTEXT Sean Christopherson
2025-06-02 22:44 ` [PATCH 1/2] KVM: SVM: Reject SEV{-ES} intra host migration if vCPU creation is in-flight Sean Christopherson
@ 2025-06-02 22:44 ` Sean Christopherson
2025-06-04 11:15 ` Liam Merwick
2025-06-04 15:02 ` [PATCH 0/2] KVM: SVM: Fix a NULL VMSA deref with MOVE_ENC_CONTEXT Paolo Bonzini
2025-06-24 19:38 ` Sean Christopherson
3 siblings, 1 reply; 9+ messages in thread
From: Sean Christopherson @ 2025-06-02 22:44 UTC (permalink / raw)
To: Sean Christopherson, Paolo Bonzini
Cc: kvm, linux-kernel, Alexander Potapenko, James Houghton,
Peter Gonda, Tom Lendacky
When creating an SEV-ES vCPU for intra-host migration, set its vmsa_pa to
INVALID_PAGE to harden against doing VMRUN with a bogus VMSA (KVM checks
for a valid VMSA page in pre_sev_run()).
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/kvm/svm/sev.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 93d899454535..5ebb265f2075 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -4471,8 +4471,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
* the VMSA will be NULL if this vCPU is the destination for intrahost
* migration, and will be copied later.
*/
- if (svm->sev_es.vmsa && !svm->sev_es.snp_has_guest_vmsa)
- svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
+ if (!svm->sev_es.snp_has_guest_vmsa) {
+ if (svm->sev_es.vmsa)
+ svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
+ else
+ svm->vmcb->control.vmsa_pa = INVALID_PAGE;
+ }
/* Can't intercept CR register access, HV can't modify CR registers */
svm_clr_intercept(svm, INTERCEPT_CR0_READ);
--
2.49.0.1204.g71687c7c1d-goog
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] KVM: SVM: Reject SEV{-ES} intra host migration if vCPU creation is in-flight
2025-06-02 22:44 ` [PATCH 1/2] KVM: SVM: Reject SEV{-ES} intra host migration if vCPU creation is in-flight Sean Christopherson
@ 2025-06-03 15:21 ` James Houghton
2025-06-03 19:00 ` Sean Christopherson
2025-06-04 11:15 ` Liam Merwick
1 sibling, 1 reply; 9+ messages in thread
From: James Houghton @ 2025-06-03 15:21 UTC (permalink / raw)
To: Sean Christopherson
Cc: Paolo Bonzini, kvm, linux-kernel, Alexander Potapenko,
Peter Gonda, Tom Lendacky
On Mon, Jun 2, 2025 at 3:45 PM Sean Christopherson <seanjc@google.com> wrote:
>
> Reject migration of SEV{-ES} state if either the source or destination VM
> is actively creating a vCPU, i.e. if kvm_vm_ioctl_create_vcpu() is in the
> section between incrementing created_vcpus and online_vcpus. The bulk of
> vCPU creation runs _outside_ of kvm->lock to allow creating multiple vCPUs
> in parallel, and so sev_info.es_active can get toggled from false=>true in
> the destination VM after (or during) svm_vcpu_create(), resulting in an
> SEV{-ES} VM effectively having a non-SEV{-ES} vCPU.
>
> The issue manifests most visibly as a crash when trying to free a vCPU's
> NULL VMSA page in an SEV-ES VM, but any number of things can go wrong.
>
> BUG: unable to handle page fault for address: ffffebde00000000
> #PF: supervisor read access in kernel mode
> #PF: error_code(0x0000) - not-present page
> PGD 0 P4D 0
> Oops: Oops: 0000 [#1] SMP KASAN NOPTI
> CPU: 227 UID: 0 PID: 64063 Comm: syz.5.60023 Tainted: G U O 6.15.0-smp-DEV #2 NONE
> Tainted: [U]=USER, [O]=OOT_MODULE
> Hardware name: Google, Inc. Arcadia_IT_80/Arcadia_IT_80, BIOS 12.52.0-0 10/28/2024
> RIP: 0010:constant_test_bit arch/x86/include/asm/bitops.h:206 [inline]
> RIP: 0010:arch_test_bit arch/x86/include/asm/bitops.h:238 [inline]
> RIP: 0010:_test_bit include/asm-generic/bitops/instrumented-non-atomic.h:142 [inline]
> RIP: 0010:PageHead include/linux/page-flags.h:866 [inline]
> RIP: 0010:___free_pages+0x3e/0x120 mm/page_alloc.c:5067
> Code: <49> f7 06 40 00 00 00 75 05 45 31 ff eb 0c 66 90 4c 89 f0 4c 39 f0
> RSP: 0018:ffff8984551978d0 EFLAGS: 00010246
> RAX: 0000777f80000001 RBX: 0000000000000000 RCX: ffffffff918aeb98
> RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffffebde00000000
> RBP: 0000000000000000 R08: ffffebde00000007 R09: 1ffffd7bc0000000
> R10: dffffc0000000000 R11: fffff97bc0000001 R12: dffffc0000000000
> R13: ffff8983e19751a8 R14: ffffebde00000000 R15: 1ffffd7bc0000000
> FS: 0000000000000000(0000) GS:ffff89ee661d3000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: ffffebde00000000 CR3: 000000793ceaa000 CR4: 0000000000350ef0
> DR0: 0000000000000000 DR1: 0000000000000b5f DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> Call Trace:
> <TASK>
> sev_free_vcpu+0x413/0x630 arch/x86/kvm/svm/sev.c:3169
> svm_vcpu_free+0x13a/0x2a0 arch/x86/kvm/svm/svm.c:1515
> kvm_arch_vcpu_destroy+0x6a/0x1d0 arch/x86/kvm/x86.c:12396
> kvm_vcpu_destroy virt/kvm/kvm_main.c:470 [inline]
> kvm_destroy_vcpus+0xd1/0x300 virt/kvm/kvm_main.c:490
> kvm_arch_destroy_vm+0x636/0x820 arch/x86/kvm/x86.c:12895
> kvm_put_kvm+0xb8e/0xfb0 virt/kvm/kvm_main.c:1310
> kvm_vm_release+0x48/0x60 virt/kvm/kvm_main.c:1369
> __fput+0x3e4/0x9e0 fs/file_table.c:465
> task_work_run+0x1a9/0x220 kernel/task_work.c:227
> exit_task_work include/linux/task_work.h:40 [inline]
> do_exit+0x7f0/0x25b0 kernel/exit.c:953
> do_group_exit+0x203/0x2d0 kernel/exit.c:1102
> get_signal+0x1357/0x1480 kernel/signal.c:3034
> arch_do_signal_or_restart+0x40/0x690 arch/x86/kernel/signal.c:337
> exit_to_user_mode_loop kernel/entry/common.c:111 [inline]
> exit_to_user_mode_prepare include/linux/entry-common.h:329 [inline]
> __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline]
> syscall_exit_to_user_mode+0x67/0xb0 kernel/entry/common.c:218
> do_syscall_64+0x7c/0x150 arch/x86/entry/syscall_64.c:100
> entry_SYSCALL_64_after_hwframe+0x76/0x7e
> RIP: 0033:0x7f87a898e969
> </TASK>
> Modules linked in: gq(O)
> gsmi: Log Shutdown Reason 0x03
> CR2: ffffebde00000000
> ---[ end trace 0000000000000000 ]---
>
> Deliberately don't check for a NULL VMSA when freeing the vCPU, as crashing
> the host is likely desirable due to the VMSA being consumed by hardware.
> E.g. if KVM manages to allow VMRUN on the vCPU, hardware may read/write a
> bogus VMSA page. Accessing PFN 0 is "fine"-ish now that it's sequestered
> away thanks to L1TF, but panicking in this scenario is preferable to
> potentially running with corrupted state.
>
> Reported-by: Alexander Potapenko <glider@google.com>
> Tested-by: Alexander Potapenko <glider@google.com>
> Fixes: 0b020f5af092 ("KVM: SEV: Add support for SEV-ES intra host migration")
> Fixes: b56639318bb2 ("KVM: SEV: Add support for SEV intra host migration")
> Cc: stable@vger.kernel.org
> Cc: James Houghton <jthoughton@google.com>
> Cc: Peter Gonda <pgonda@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
Thanks Sean! Free free to add:
Reviewed-by: James Houghton <jthoughton@google.com>
> ---
> arch/x86/kvm/svm/sev.c | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index a7a7dc507336..93d899454535 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -2032,6 +2032,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src)
> struct kvm_vcpu *src_vcpu;
> unsigned long i;
>
> + if (src->created_vcpus != atomic_read(&src->online_vcpus) ||
> + dst->created_vcpus != atomic_read(&dst->online_vcpus))
> + return -EINVAL;
I think -EBUSY (or perhaps -EAGAIN) might be a more proper return code.
> +
> if (!sev_es_guest(src))
> return 0;
>
> --
> 2.49.0.1204.g71687c7c1d-goog
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] KVM: SVM: Reject SEV{-ES} intra host migration if vCPU creation is in-flight
2025-06-03 15:21 ` James Houghton
@ 2025-06-03 19:00 ` Sean Christopherson
0 siblings, 0 replies; 9+ messages in thread
From: Sean Christopherson @ 2025-06-03 19:00 UTC (permalink / raw)
To: James Houghton
Cc: Paolo Bonzini, kvm, linux-kernel, Alexander Potapenko,
Peter Gonda, Tom Lendacky
On Tue, Jun 03, 2025, James Houghton wrote:
> On Mon, Jun 2, 2025 at 3:45 PM Sean Christopherson <seanjc@google.com> wrote:
> > ---
> > arch/x86/kvm/svm/sev.c | 4 ++++
> > 1 file changed, 4 insertions(+)
> >
> > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> > index a7a7dc507336..93d899454535 100644
> > --- a/arch/x86/kvm/svm/sev.c
> > +++ b/arch/x86/kvm/svm/sev.c
> > @@ -2032,6 +2032,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src)
> > struct kvm_vcpu *src_vcpu;
> > unsigned long i;
> >
> > + if (src->created_vcpus != atomic_read(&src->online_vcpus) ||
> > + dst->created_vcpus != atomic_read(&dst->online_vcpus))
> > + return -EINVAL;
>
> I think -EBUSY (or perhaps -EAGAIN) might be a more proper return code.
Yeah, I was 50/50 on EBUSY vs EINVAL. I think I went with EINVAL mostly out of
spite :-)
I'll change it to EBUSY.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] KVM: SVM: Reject SEV{-ES} intra host migration if vCPU creation is in-flight
2025-06-02 22:44 ` [PATCH 1/2] KVM: SVM: Reject SEV{-ES} intra host migration if vCPU creation is in-flight Sean Christopherson
2025-06-03 15:21 ` James Houghton
@ 2025-06-04 11:15 ` Liam Merwick
1 sibling, 0 replies; 9+ messages in thread
From: Liam Merwick @ 2025-06-04 11:15 UTC (permalink / raw)
To: Sean Christopherson, Paolo Bonzini
Cc: kvm, linux-kernel, Alexander Potapenko, James Houghton,
Peter Gonda, Tom Lendacky
On 02/06/2025 23:44, Sean Christopherson wrote:
> Reject migration of SEV{-ES} state if either the source or destination VM
> is actively creating a vCPU, i.e. if kvm_vm_ioctl_create_vcpu() is in the
> section between incrementing created_vcpus and online_vcpus. The bulk of
> vCPU creation runs _outside_ of kvm->lock to allow creating multiple vCPUs
> in parallel, and so sev_info.es_active can get toggled from false=>true in
> the destination VM after (or during) svm_vcpu_create(), resulting in an
> SEV{-ES} VM effectively having a non-SEV{-ES} vCPU.
>
> The issue manifests most visibly as a crash when trying to free a vCPU's
> NULL VMSA page in an SEV-ES VM, but any number of things can go wrong.
>
> BUG: unable to handle page fault for address: ffffebde00000000
> #PF: supervisor read access in kernel mode
> #PF: error_code(0x0000) - not-present page
> PGD 0 P4D 0
> Oops: Oops: 0000 [#1] SMP KASAN NOPTI
> CPU: 227 UID: 0 PID: 64063 Comm: syz.5.60023 Tainted: G U O 6.15.0-smp-DEV #2 NONE
> Tainted: [U]=USER, [O]=OOT_MODULE
> Hardware name: Google, Inc. Arcadia_IT_80/Arcadia_IT_80, BIOS 12.52.0-0 10/28/2024
> RIP: 0010:constant_test_bit arch/x86/include/asm/bitops.h:206 [inline]
> RIP: 0010:arch_test_bit arch/x86/include/asm/bitops.h:238 [inline]
> RIP: 0010:_test_bit include/asm-generic/bitops/instrumented-non-atomic.h:142 [inline]
> RIP: 0010:PageHead include/linux/page-flags.h:866 [inline]
> RIP: 0010:___free_pages+0x3e/0x120 mm/page_alloc.c:5067
> Code: <49> f7 06 40 00 00 00 75 05 45 31 ff eb 0c 66 90 4c 89 f0 4c 39 f0
> RSP: 0018:ffff8984551978d0 EFLAGS: 00010246
> RAX: 0000777f80000001 RBX: 0000000000000000 RCX: ffffffff918aeb98
> RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffffebde00000000
> RBP: 0000000000000000 R08: ffffebde00000007 R09: 1ffffd7bc0000000
> R10: dffffc0000000000 R11: fffff97bc0000001 R12: dffffc0000000000
> R13: ffff8983e19751a8 R14: ffffebde00000000 R15: 1ffffd7bc0000000
> FS: 0000000000000000(0000) GS:ffff89ee661d3000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: ffffebde00000000 CR3: 000000793ceaa000 CR4: 0000000000350ef0
> DR0: 0000000000000000 DR1: 0000000000000b5f DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> Call Trace:
> <TASK>
> sev_free_vcpu+0x413/0x630 arch/x86/kvm/svm/sev.c:3169
> svm_vcpu_free+0x13a/0x2a0 arch/x86/kvm/svm/svm.c:1515
> kvm_arch_vcpu_destroy+0x6a/0x1d0 arch/x86/kvm/x86.c:12396
> kvm_vcpu_destroy virt/kvm/kvm_main.c:470 [inline]
> kvm_destroy_vcpus+0xd1/0x300 virt/kvm/kvm_main.c:490
> kvm_arch_destroy_vm+0x636/0x820 arch/x86/kvm/x86.c:12895
> kvm_put_kvm+0xb8e/0xfb0 virt/kvm/kvm_main.c:1310
> kvm_vm_release+0x48/0x60 virt/kvm/kvm_main.c:1369
> __fput+0x3e4/0x9e0 fs/file_table.c:465
> task_work_run+0x1a9/0x220 kernel/task_work.c:227
> exit_task_work include/linux/task_work.h:40 [inline]
> do_exit+0x7f0/0x25b0 kernel/exit.c:953
> do_group_exit+0x203/0x2d0 kernel/exit.c:1102
> get_signal+0x1357/0x1480 kernel/signal.c:3034
> arch_do_signal_or_restart+0x40/0x690 arch/x86/kernel/signal.c:337
> exit_to_user_mode_loop kernel/entry/common.c:111 [inline]
> exit_to_user_mode_prepare include/linux/entry-common.h:329 [inline]
> __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline]
> syscall_exit_to_user_mode+0x67/0xb0 kernel/entry/common.c:218
> do_syscall_64+0x7c/0x150 arch/x86/entry/syscall_64.c:100
> entry_SYSCALL_64_after_hwframe+0x76/0x7e
> RIP: 0033:0x7f87a898e969
> </TASK>
> Modules linked in: gq(O)
> gsmi: Log Shutdown Reason 0x03
> CR2: ffffebde00000000
> ---[ end trace 0000000000000000 ]---
>
> Deliberately don't check for a NULL VMSA when freeing the vCPU, as crashing
> the host is likely desirable due to the VMSA being consumed by hardware.
> E.g. if KVM manages to allow VMRUN on the vCPU, hardware may read/write a
> bogus VMSA page. Accessing PFN 0 is "fine"-ish now that it's sequestered
> away thanks to L1TF, but panicking in this scenario is preferable to
> potentially running with corrupted state.
>
> Reported-by: Alexander Potapenko <glider@google.com>
> Tested-by: Alexander Potapenko <glider@google.com>
> Fixes: 0b020f5af092 ("KVM: SEV: Add support for SEV-ES intra host migration")
> Fixes: b56639318bb2 ("KVM: SEV: Add support for SEV intra host migration")
> Cc: stable@vger.kernel.org
> Cc: James Houghton <jthoughton@google.com>
> Cc: Peter Gonda <pgonda@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Liam Merwick <liam.merwick@oracle.com>
Tested-by: Liam Merwick <liam.merwick@oracle.com>
> ---
> arch/x86/kvm/svm/sev.c | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index a7a7dc507336..93d899454535 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -2032,6 +2032,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src)
> struct kvm_vcpu *src_vcpu;
> unsigned long i;
>
> + if (src->created_vcpus != atomic_read(&src->online_vcpus) ||
> + dst->created_vcpus != atomic_read(&dst->online_vcpus))
> + return -EINVAL;
> +
> if (!sev_es_guest(src))
> return 0;
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/2] KVM: SVM: Initialize vmsa_pa in VMCB to INVALID_PAGE if VMSA page is NULL
2025-06-02 22:44 ` [PATCH 2/2] KVM: SVM: Initialize vmsa_pa in VMCB to INVALID_PAGE if VMSA page is NULL Sean Christopherson
@ 2025-06-04 11:15 ` Liam Merwick
0 siblings, 0 replies; 9+ messages in thread
From: Liam Merwick @ 2025-06-04 11:15 UTC (permalink / raw)
To: Sean Christopherson, Paolo Bonzini
Cc: kvm, linux-kernel, Alexander Potapenko, James Houghton,
Peter Gonda, Tom Lendacky
On 02/06/2025 23:44, Sean Christopherson wrote:
> When creating an SEV-ES vCPU for intra-host migration, set its vmsa_pa to
> INVALID_PAGE to harden against doing VMRUN with a bogus VMSA (KVM checks
> for a valid VMSA page in pre_sev_run()).
>
> Cc: Tom Lendacky <thomas.lendacky@amd.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Liam Merwick <liam.merwick@oracle.com>
Tested-by: Liam Merwick <liam.merwick@oracle.com>
> ---
> arch/x86/kvm/svm/sev.c | 8 ++++++--
> 1 file changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 93d899454535..5ebb265f2075 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -4471,8 +4471,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
> * the VMSA will be NULL if this vCPU is the destination for intrahost
> * migration, and will be copied later.
> */
> - if (svm->sev_es.vmsa && !svm->sev_es.snp_has_guest_vmsa)
> - svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
> + if (!svm->sev_es.snp_has_guest_vmsa) {
> + if (svm->sev_es.vmsa)
> + svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
> + else
> + svm->vmcb->control.vmsa_pa = INVALID_PAGE;
> + }
>
> /* Can't intercept CR register access, HV can't modify CR registers */
> svm_clr_intercept(svm, INTERCEPT_CR0_READ);
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 0/2] KVM: SVM: Fix a NULL VMSA deref with MOVE_ENC_CONTEXT
2025-06-02 22:44 [PATCH 0/2] KVM: SVM: Fix a NULL VMSA deref with MOVE_ENC_CONTEXT Sean Christopherson
2025-06-02 22:44 ` [PATCH 1/2] KVM: SVM: Reject SEV{-ES} intra host migration if vCPU creation is in-flight Sean Christopherson
2025-06-02 22:44 ` [PATCH 2/2] KVM: SVM: Initialize vmsa_pa in VMCB to INVALID_PAGE if VMSA page is NULL Sean Christopherson
@ 2025-06-04 15:02 ` Paolo Bonzini
2025-06-24 19:38 ` Sean Christopherson
3 siblings, 0 replies; 9+ messages in thread
From: Paolo Bonzini @ 2025-06-04 15:02 UTC (permalink / raw)
To: Sean Christopherson
Cc: kvm, linux-kernel, Alexander Potapenko, James Houghton,
Peter Gonda, Tom Lendacky
> Fix a NULL VMSA deref bug (which is probably the tip of the iceberg with
> respect to what all can go wrong) due to a race between KVM_CREATE_VCPU and
> KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, where a non-SEV-ES vCPU can be created in
> an SEV-ES VM.
>
> Found by running syzkaller on a bare metal SEV-ES host. C repro below.
Queued, thanks (with EBUSY instead of EINVAL).
Paolo
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 0/2] KVM: SVM: Fix a NULL VMSA deref with MOVE_ENC_CONTEXT
2025-06-02 22:44 [PATCH 0/2] KVM: SVM: Fix a NULL VMSA deref with MOVE_ENC_CONTEXT Sean Christopherson
` (2 preceding siblings ...)
2025-06-04 15:02 ` [PATCH 0/2] KVM: SVM: Fix a NULL VMSA deref with MOVE_ENC_CONTEXT Paolo Bonzini
@ 2025-06-24 19:38 ` Sean Christopherson
3 siblings, 0 replies; 9+ messages in thread
From: Sean Christopherson @ 2025-06-24 19:38 UTC (permalink / raw)
To: Sean Christopherson, Paolo Bonzini
Cc: kvm, linux-kernel, Alexander Potapenko, James Houghton,
Peter Gonda, Tom Lendacky
On Mon, 02 Jun 2025 15:44:57 -0700, Sean Christopherson wrote:
> Fix a NULL VMSA deref bug (which is probably the tip of the iceberg with
> respect to what all can go wrong) due to a race between KVM_CREATE_VCPU and
> KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, where a non-SEV-ES vCPU can be created in
> an SEV-ES VM.
>
> Found by running syzkaller on a bare metal SEV-ES host. C repro below.
>
> [...]
Applied to kvm-x86 fixes. Paolo's "Queued, thanks!" seems to have been
premature (though Paolo's mail saved me; I completely forgot about swapping
EINVAL to EBUSY).
[1/2] KVM: SVM: Reject SEV{-ES} intra host migration if vCPU creation is in-flight
https://github.com/kvm-x86/linux/commit/ecf371f8b02d
[2/2] KVM: SVM: Initialize vmsa_pa in VMCB to INVALID_PAGE if VMSA page is NULL
https://github.com/kvm-x86/linux/commit/48f15f62418
--
https://github.com/kvm-x86/kvm-unit-tests/tree/next
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2025-06-24 19:40 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-02 22:44 [PATCH 0/2] KVM: SVM: Fix a NULL VMSA deref with MOVE_ENC_CONTEXT Sean Christopherson
2025-06-02 22:44 ` [PATCH 1/2] KVM: SVM: Reject SEV{-ES} intra host migration if vCPU creation is in-flight Sean Christopherson
2025-06-03 15:21 ` James Houghton
2025-06-03 19:00 ` Sean Christopherson
2025-06-04 11:15 ` Liam Merwick
2025-06-02 22:44 ` [PATCH 2/2] KVM: SVM: Initialize vmsa_pa in VMCB to INVALID_PAGE if VMSA page is NULL Sean Christopherson
2025-06-04 11:15 ` Liam Merwick
2025-06-04 15:02 ` [PATCH 0/2] KVM: SVM: Fix a NULL VMSA deref with MOVE_ENC_CONTEXT Paolo Bonzini
2025-06-24 19:38 ` Sean Christopherson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).