public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Sean Christopherson <seanjc@google.com>
To: Vitaly Kuznetsov <vkuznets@redhat.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	kvm@vger.kernel.org,  linux-kernel@vger.kernel.org
Subject: Re: [PATCH 0/5] KVM: x86/hyperv: Fix racy usage of vcpu->arch.hyperv
Date: Thu, 23 Apr 2026 07:40:29 -0700	[thread overview]
Message-ID: <aeovXe9YN_AxVYeX@google.com> (raw)
In-Reply-To: <20260423140833.439512-1-seanjc@google.com>

On Thu, Apr 23, 2026, Sean Christopherson wrote:
> I'll reply with the C reproducer (which may or may not repro on an upstream
> kernel; I was never able to reproduce the splat myself, and relied on syzbot
> to test for me).

Aaaand I almost forgot.

// autogenerated by syzkaller (https://github.com/google/syzkaller)

#define _GNU_SOURCE

#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>

#include <linux/futex.h>
#include <linux/kvm.h>

static unsigned long long procid;

static void sleep_ms(uint64_t ms)
{
  usleep(ms * 1000);
}

static uint64_t current_time_ms(void)
{
  struct timespec ts;
  if (clock_gettime(CLOCK_MONOTONIC, &ts))
    exit(1);
  return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}

static void thread_start(void* (*fn)(void*), void* arg)
{
  pthread_t th;
  pthread_attr_t attr;
  pthread_attr_init(&attr);
  pthread_attr_setstacksize(&attr, 128 << 10);
  int i = 0;
  for (; i < 100; i++) {
    if (pthread_create(&th, &attr, fn, arg) == 0) {
      pthread_attr_destroy(&attr);
      return;
    }
    if (errno == EAGAIN) {
      usleep(50);
      continue;
    }
    break;
  }
  exit(1);
}

typedef struct {
  int state;
} event_t;

static void event_init(event_t* ev)
{
  ev->state = 0;
}

static void event_reset(event_t* ev)
{
  ev->state = 0;
}

static void event_set(event_t* ev)
{
  if (ev->state)
    exit(1);
  __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
  syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1000000);
}

static void event_wait(event_t* ev)
{
  while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
    syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
}

static int event_isset(event_t* ev)
{
  return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
}

static int event_timedwait(event_t* ev, uint64_t timeout)
{
  uint64_t start = current_time_ms();
  uint64_t now = start;
  for (;;) {
    uint64_t remain = timeout - (now - start);
    struct timespec ts;
    ts.tv_sec = remain / 1000;
    ts.tv_nsec = (remain % 1000) * 1000 * 1000;
    syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
    if (__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
      return 1;
    now = current_time_ms();
    if (now - start > timeout)
      return 0;
  }
}

static bool write_file(const char* file, const char* what, ...)
{
  char buf[1024];
  va_list args;
  va_start(args, what);
  vsnprintf(buf, sizeof(buf), what, args);
  va_end(args);
  buf[sizeof(buf) - 1] = 0;
  int len = strlen(buf);
  int fd = open(file, O_WRONLY | O_CLOEXEC);
  if (fd == -1)
    return false;
  if (write(fd, buf, len) != len) {
    int err = errno;
    close(fd);
    errno = err;
    return false;
  }
  close(fd);
  return true;
}

#define noinline __attribute__((noinline))
#define always_inline __attribute__((always_inline)) inline
#define __no_stack_protector
#define __addrspace_guest
#define __optnone
#define GUEST_CODE                                                             \
  __attribute__((section("guest"))) __no_stack_protector __addrspace_guest
extern char *__start_guest, *__stop_guest;
struct api_call_header {
  uint64_t call;
  uint64_t size;
};

struct api_call_1 {
  struct api_call_header header;
  uint64_t arg;
};

struct api_call_2 {
  struct api_call_header header;
  uint64_t args[2];
};

struct api_call_3 {
  struct api_call_header header;
  uint64_t args[3];
};

struct api_call_5 {
  struct api_call_header header;
  uint64_t args[5];
};

#define X86_ADDR_TEXT 0x0000
#define X86_ADDR_PD_IOAPIC 0x0000
#define X86_ADDR_GDT 0x1000
#define X86_ADDR_LDT 0x1800
#define X86_ADDR_PML4 0x2000
#define X86_ADDR_PDP 0x3000
#define X86_ADDR_PD 0x4000
#define X86_ADDR_STACK0 0x0f80
#define X86_ADDR_VAR_HLT 0x2800
#define X86_ADDR_VAR_SYSRET 0x2808
#define X86_ADDR_VAR_SYSEXIT 0x2810
#define X86_ADDR_VAR_IDT 0x3800
#define X86_ADDR_VAR_TSS64 0x3a00
#define X86_ADDR_VAR_TSS64_CPL3 0x3c00
#define X86_ADDR_VAR_TSS16 0x3d00
#define X86_ADDR_VAR_TSS16_2 0x3e00
#define X86_ADDR_VAR_TSS16_CPL3 0x3f00
#define X86_ADDR_VAR_TSS32 0x4800
#define X86_ADDR_VAR_TSS32_2 0x4a00
#define X86_ADDR_VAR_TSS32_CPL3 0x4c00
#define X86_ADDR_VAR_TSS32_VM86 0x4e00
#define X86_ADDR_VAR_VMXON_PTR 0x5f00
#define X86_ADDR_VAR_VMCS_PTR 0x5f08
#define X86_ADDR_VAR_VMEXIT_PTR 0x5f10
#define X86_ADDR_VAR_VMWRITE_FLD 0x5f18
#define X86_ADDR_VAR_VMWRITE_VAL 0x5f20
#define X86_ADDR_VAR_VMXON 0x6000
#define X86_ADDR_VAR_VMCS 0x7000
#define X86_ADDR_VAR_VMEXIT_CODE 0x9000
#define X86_ADDR_VAR_USER_CODE 0x9100
#define X86_ADDR_VAR_USER_CODE2 0x9120
#define X86_SYZOS_ADDR_ZERO 0x0
#define X86_SYZOS_ADDR_GDT 0x1000
#define X86_SYZOS_ADDR_PML4 0x2000
#define X86_SYZOS_ADDR_PDP 0x3000
#define X86_SYZOS_ADDR_VAR_IDT 0x25000
#define X86_SYZOS_ADDR_VAR_TSS 0x26000
#define X86_SYZOS_ADDR_BOOT_ARGS 0x2F000

#define X86_SYZOS_ADDR_SMRAM 0x30000
#define X86_SYZOS_ADDR_EXIT 0x40000
#define X86_SYZOS_ADDR_UEXIT (X86_SYZOS_ADDR_EXIT + 256)
#define X86_SYZOS_ADDR_DIRTY_PAGES 0x41000
#define X86_SYZOS_ADDR_USER_CODE 0x50000
#define SYZOS_ADDR_EXECUTOR_CODE 0x54000
#define X86_SYZOS_ADDR_SCRATCH_CODE 0x58000
#define X86_SYZOS_ADDR_STACK_BOTTOM 0x60000
#define X86_SYZOS_ADDR_STACK0 0x60f80
#define X86_SYZOS_PER_VCPU_REGIONS_BASE 0x400000
#define X86_SYZOS_L1_VCPU_REGION_SIZE 0x40000
#define X86_SYZOS_L1_VCPU_OFFSET_VM_ARCH_SPECIFIC 0x0000
#define X86_SYZOS_L1_VCPU_OFFSET_L2_VMS_AREA 0x1000
#define X86_SYZOS_ADDR_GLOBALS 0x17F000
#define X86_SYZOS_ADDR_PT_POOL 0x180000
#define X86_SYZOS_PT_POOL_SIZE 64
#define X86_SYZOS_L2_VM_REGION_SIZE 0x8000
#define X86_SYZOS_L2_VM_OFFSET_VMCS_VMCB 0x0000
#define X86_SYZOS_L2_VM_OFFSET_VM_STACK 0x1000
#define X86_SYZOS_L2_VM_OFFSET_VM_CODE 0x2000
#define X86_SYZOS_L2_VM_OFFSET_VM_PGTABLE 0x3000
#define X86_SYZOS_L2_VM_OFFSET_MSR_BITMAP 0x7000
#define X86_SYZOS_ADDR_UNUSED 0x1000000
#define X86_SYZOS_ADDR_IOAPIC 0xfec00000

#define X86_SYZOS_ADDR_VMCS_VMCB(cpu, vm)                                      \
  (X86_SYZOS_PER_VCPU_REGIONS_BASE + (cpu) * X86_SYZOS_L1_VCPU_REGION_SIZE +   \
   X86_SYZOS_L1_VCPU_OFFSET_L2_VMS_AREA + (vm) * X86_SYZOS_L2_VM_REGION_SIZE + \
   X86_SYZOS_L2_VM_OFFSET_VMCS_VMCB)

#define X86_SYZOS_ADDR_VM_CODE(cpu, vm)                                        \
  (X86_SYZOS_PER_VCPU_REGIONS_BASE + (cpu) * X86_SYZOS_L1_VCPU_REGION_SIZE +   \
   X86_SYZOS_L1_VCPU_OFFSET_L2_VMS_AREA + (vm) * X86_SYZOS_L2_VM_REGION_SIZE + \
   X86_SYZOS_L2_VM_OFFSET_VM_CODE)

#define X86_SYZOS_ADDR_VM_STACK(cpu, vm)                                       \
  (X86_SYZOS_PER_VCPU_REGIONS_BASE + (cpu) * X86_SYZOS_L1_VCPU_REGION_SIZE +   \
   X86_SYZOS_L1_VCPU_OFFSET_L2_VMS_AREA + (vm) * X86_SYZOS_L2_VM_REGION_SIZE + \
   X86_SYZOS_L2_VM_OFFSET_VM_STACK)

#define X86_SYZOS_ADDR_VM_PGTABLE(cpu, vm)                                     \
  (X86_SYZOS_PER_VCPU_REGIONS_BASE + (cpu) * X86_SYZOS_L1_VCPU_REGION_SIZE +   \
   X86_SYZOS_L1_VCPU_OFFSET_L2_VMS_AREA + (vm) * X86_SYZOS_L2_VM_REGION_SIZE + \
   X86_SYZOS_L2_VM_OFFSET_VM_PGTABLE)

#define X86_SYZOS_ADDR_MSR_BITMAP(cpu, vm)                                     \
  (X86_SYZOS_PER_VCPU_REGIONS_BASE + (cpu) * X86_SYZOS_L1_VCPU_REGION_SIZE +   \
   X86_SYZOS_L1_VCPU_OFFSET_L2_VMS_AREA + (vm) * X86_SYZOS_L2_VM_REGION_SIZE + \
   X86_SYZOS_L2_VM_OFFSET_MSR_BITMAP)

#define X86_SYZOS_ADDR_VM_ARCH_SPECIFIC(cpu)                                   \
  (X86_SYZOS_PER_VCPU_REGIONS_BASE + (cpu) * X86_SYZOS_L1_VCPU_REGION_SIZE +   \
   X86_SYZOS_L1_VCPU_OFFSET_VM_ARCH_SPECIFIC)
#define X86_SYZOS_SEL_CODE 0x8
#define X86_SYZOS_SEL_DATA 0x10
#define X86_SYZOS_SEL_TSS64 0x18

#define X86_CR0_PE 1ULL
#define X86_CR0_MP (1ULL << 1)
#define X86_CR0_EM (1ULL << 2)
#define X86_CR0_TS (1ULL << 3)
#define X86_CR0_ET (1ULL << 4)
#define X86_CR0_NE (1ULL << 5)
#define X86_CR0_WP (1ULL << 16)
#define X86_CR0_AM (1ULL << 18)
#define X86_CR0_NW (1ULL << 29)
#define X86_CR0_CD (1ULL << 30)
#define X86_CR0_PG (1ULL << 31)

#define X86_CR4_VME 1ULL
#define X86_CR4_PVI (1ULL << 1)
#define X86_CR4_TSD (1ULL << 2)
#define X86_CR4_DE (1ULL << 3)
#define X86_CR4_PSE (1ULL << 4)
#define X86_CR4_PAE (1ULL << 5)
#define X86_CR4_MCE (1ULL << 6)
#define X86_CR4_PGE (1ULL << 7)
#define X86_CR4_PCE (1ULL << 8)
#define X86_CR4_OSFXSR (1ULL << 9)
#define X86_CR4_OSXMMEXCPT (1ULL << 10)
#define X86_CR4_UMIP (1ULL << 11)
#define X86_CR4_VMXE (1ULL << 13)
#define X86_CR4_SMXE (1ULL << 14)
#define X86_CR4_FSGSBASE (1ULL << 16)
#define X86_CR4_PCIDE (1ULL << 17)
#define X86_CR4_OSXSAVE (1ULL << 18)
#define X86_CR4_SMEP (1ULL << 20)
#define X86_CR4_SMAP (1ULL << 21)
#define X86_CR4_PKE (1ULL << 22)

#define X86_EFER_SCE 1ULL
#define X86_EFER_LME (1ULL << 8)
#define X86_EFER_LMA (1ULL << 10)
#define X86_EFER_NXE (1ULL << 11)
#define X86_EFER_SVME (1ULL << 12)
#define X86_EFER_LMSLE (1ULL << 13)
#define X86_EFER_FFXSR (1ULL << 14)
#define X86_EFER_TCE (1ULL << 15)
#define X86_PDE32_PRESENT 1UL
#define X86_PDE32_RW (1UL << 1)
#define X86_PDE32_USER (1UL << 2)
#define X86_PDE32_PS (1UL << 7)
#define X86_PDE64_PRESENT 1
#define X86_PDE64_RW (1ULL << 1)
#define X86_PDE64_USER (1ULL << 2)
#define X86_PDE64_ACCESSED (1ULL << 5)
#define X86_PDE64_DIRTY (1ULL << 6)
#define X86_PDE64_PS (1ULL << 7)
#define X86_PDE64_G (1ULL << 8)
#define EPT_MEMTYPE_WB (6ULL << 3)
#define EPT_ACCESSED (1ULL << 8)
#define EPT_DIRTY (1ULL << 9)

#define X86_SEL_LDT (1 << 3)
#define X86_SEL_CS16 (2 << 3)
#define X86_SEL_DS16 (3 << 3)
#define X86_SEL_CS16_CPL3 ((4 << 3) + 3)
#define X86_SEL_DS16_CPL3 ((5 << 3) + 3)
#define X86_SEL_CS32 (6 << 3)
#define X86_SEL_DS32 (7 << 3)
#define X86_SEL_CS32_CPL3 ((8 << 3) + 3)
#define X86_SEL_DS32_CPL3 ((9 << 3) + 3)
#define X86_SEL_CS64 (10 << 3)
#define X86_SEL_DS64 (11 << 3)
#define X86_SEL_CS64_CPL3 ((12 << 3) + 3)
#define X86_SEL_DS64_CPL3 ((13 << 3) + 3)
#define X86_SEL_CGATE16 (14 << 3)
#define X86_SEL_TGATE16 (15 << 3)
#define X86_SEL_CGATE32 (16 << 3)
#define X86_SEL_TGATE32 (17 << 3)
#define X86_SEL_CGATE64 (18 << 3)
#define X86_SEL_CGATE64_HI (19 << 3)
#define X86_SEL_TSS16 (20 << 3)
#define X86_SEL_TSS16_2 (21 << 3)
#define X86_SEL_TSS16_CPL3 ((22 << 3) + 3)
#define X86_SEL_TSS32 (23 << 3)
#define X86_SEL_TSS32_2 (24 << 3)
#define X86_SEL_TSS32_CPL3 ((25 << 3) + 3)
#define X86_SEL_TSS32_VM86 (26 << 3)
#define X86_SEL_TSS64 (27 << 3)
#define X86_SEL_TSS64_HI (28 << 3)
#define X86_SEL_TSS64_CPL3 ((29 << 3) + 3)
#define X86_SEL_TSS64_CPL3_HI (30 << 3)
#define X86_MSR_IA32_FEATURE_CONTROL 0x3a
#define X86_MSR_IA32_VMX_BASIC 0x480
#define X86_MSR_IA32_SMBASE 0x9e
#define X86_MSR_IA32_SYSENTER_CS 0x174
#define X86_MSR_IA32_SYSENTER_ESP 0x175
#define X86_MSR_IA32_SYSENTER_EIP 0x176
#define X86_MSR_IA32_CR_PAT 0x277
#define X86_MSR_CORE_PERF_GLOBAL_CTRL 0x38f
#define X86_MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x48d
#define X86_MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x48e
#define X86_MSR_IA32_VMX_TRUE_EXIT_CTLS 0x48f
#define X86_MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x490
#define X86_MSR_IA32_EFER 0xc0000080
#define X86_MSR_IA32_STAR 0xC0000081
#define X86_MSR_IA32_LSTAR 0xC0000082
#define X86_MSR_FS_BASE 0xc0000100
#define X86_MSR_GS_BASE 0xc0000101
#define X86_MSR_VM_HSAVE_PA 0xc0010117
#define X86_MSR_IA32_VMX_PROCBASED_CTLS2 0x48B
#define RFLAGS_1_BIT (1ULL << 1)
#define CPU_BASED_HLT_EXITING (1U << 7)
#define CPU_BASED_RDTSC_EXITING (1U << 12)
#define AR_TSS_AVAILABLE 0x0089
#define SVM_ATTR_LDTR_UNUSABLE 0x0000
#define VMX_AR_TSS_BUSY 0x008b
#define VMX_AR_TSS_AVAILABLE 0x0089
#define VMX_AR_LDTR_UNUSABLE 0x10000
#define VM_ENTRY_IA32E_MODE (1U << 9)
#define SECONDARY_EXEC_ENABLE_EPT (1U << 1)
#define SECONDARY_EXEC_ENABLE_RDTSCP (1U << 3)
#define VM_EXIT_HOST_ADDR_SPACE_SIZE (1U << 9)
#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS (1U << 31)

#define VMX_ACCESS_RIGHTS_P (1 << 7)
#define VMX_ACCESS_RIGHTS_S (1 << 4)
#define VMX_ACCESS_RIGHTS_TYPE_A (1 << 0)
#define VMX_ACCESS_RIGHTS_TYPE_RW (1 << 1)
#define VMX_ACCESS_RIGHTS_TYPE_E (1 << 3)
#define VMX_ACCESS_RIGHTS_G (1 << 15)
#define VMX_ACCESS_RIGHTS_DB (1 << 14)
#define VMX_ACCESS_RIGHTS_L (1 << 13)
#define VMX_AR_64BIT_DATA_STACK                                                \
  (VMX_ACCESS_RIGHTS_P | VMX_ACCESS_RIGHTS_S | VMX_ACCESS_RIGHTS_TYPE_RW |     \
   VMX_ACCESS_RIGHTS_TYPE_A | VMX_ACCESS_RIGHTS_G | VMX_ACCESS_RIGHTS_DB)
#define VMX_AR_64BIT_CODE                                                      \
  (VMX_ACCESS_RIGHTS_P | VMX_ACCESS_RIGHTS_S | VMX_ACCESS_RIGHTS_TYPE_E |      \
   VMX_ACCESS_RIGHTS_TYPE_RW | VMX_ACCESS_RIGHTS_TYPE_A |                      \
   VMX_ACCESS_RIGHTS_G | VMX_ACCESS_RIGHTS_L)
#define VMCS_VIRTUAL_PROCESSOR_ID 0x00000000
#define VMCS_POSTED_INTR_NV 0x00000002
#define VMCS_MSR_BITMAP 0x00002004
#define VMCS_VMREAD_BITMAP 0x00002006
#define VMCS_VMWRITE_BITMAP 0x00002008
#define VMCS_EPT_POINTER 0x0000201a
#define VMCS_LINK_POINTER 0x00002800
#define VMCS_PIN_BASED_VM_EXEC_CONTROL 0x00004000
#define VMCS_CPU_BASED_VM_EXEC_CONTROL 0x00004002
#define VMCS_EXCEPTION_BITMAP 0x00004004
#define VMCS_PAGE_FAULT_ERROR_CODE_MASK 0x00004006
#define VMCS_PAGE_FAULT_ERROR_CODE_MATCH 0x00004008
#define VMCS_CR3_TARGET_COUNT 0x0000400a
#define VMCS_VM_EXIT_CONTROLS 0x0000400c
#define VMCS_VM_EXIT_MSR_STORE_COUNT 0x0000400e
#define VMCS_VM_EXIT_MSR_LOAD_COUNT 0x00004010
#define VMCS_VM_ENTRY_CONTROLS 0x00004012
#define VMCS_VM_ENTRY_MSR_LOAD_COUNT 0x00004014
#define VMCS_VM_ENTRY_INTR_INFO_FIELD 0x00004016
#define VMCS_TPR_THRESHOLD 0x0000401c
#define VMCS_SECONDARY_VM_EXEC_CONTROL 0x0000401e
#define VMCS_VM_INSTRUCTION_ERROR 0x00004400
#define VMCS_VM_EXIT_REASON 0x00004402
#define VMCS_VMX_PREEMPTION_TIMER_VALUE 0x0000482e
#define VMCS_CR0_GUEST_HOST_MASK 0x00006000
#define VMCS_CR4_GUEST_HOST_MASK 0x00006002
#define VMCS_CR0_READ_SHADOW 0x00006004
#define VMCS_CR4_READ_SHADOW 0x00006006
#define VMCS_HOST_ES_SELECTOR 0x00000c00
#define VMCS_HOST_CS_SELECTOR 0x00000c02
#define VMCS_HOST_SS_SELECTOR 0x00000c04
#define VMCS_HOST_DS_SELECTOR 0x00000c06
#define VMCS_HOST_FS_SELECTOR 0x00000c08
#define VMCS_HOST_GS_SELECTOR 0x00000c0a
#define VMCS_HOST_TR_SELECTOR 0x00000c0c
#define VMCS_HOST_IA32_PAT 0x00002c00
#define VMCS_HOST_IA32_EFER 0x00002c02
#define VMCS_HOST_IA32_PERF_GLOBAL_CTRL 0x00002c04
#define VMCS_HOST_IA32_SYSENTER_CS 0x00004c00
#define VMCS_HOST_CR0 0x00006c00
#define VMCS_HOST_CR3 0x00006c02
#define VMCS_HOST_CR4 0x00006c04
#define VMCS_HOST_FS_BASE 0x00006c06
#define VMCS_HOST_GS_BASE 0x00006c08
#define VMCS_HOST_TR_BASE 0x00006c0a
#define VMCS_HOST_GDTR_BASE 0x00006c0c
#define VMCS_HOST_IDTR_BASE 0x00006c0e
#define VMCS_HOST_IA32_SYSENTER_ESP 0x00006c10
#define VMCS_HOST_IA32_SYSENTER_EIP 0x00006c12
#define VMCS_HOST_RSP 0x00006c14
#define VMCS_HOST_RIP 0x00006c16
#define VMCS_GUEST_INTR_STATUS 0x00000810
#define VMCS_GUEST_PML_INDEX 0x00000812
#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400
#define VMCS_GUEST_IA32_DEBUGCTL 0x00002802
#define VMCS_GUEST_IA32_PAT 0x00002804
#define VMCS_GUEST_IA32_EFER 0x00002806
#define VMCS_GUEST_IA32_PERF_GLOBAL_CTRL 0x00002808
#define VMCS_GUEST_ES_SELECTOR 0x00000800
#define VMCS_GUEST_CS_SELECTOR 0x00000802
#define VMCS_GUEST_SS_SELECTOR 0x00000804
#define VMCS_GUEST_DS_SELECTOR 0x00000806
#define VMCS_GUEST_FS_SELECTOR 0x00000808
#define VMCS_GUEST_GS_SELECTOR 0x0000080a
#define VMCS_GUEST_LDTR_SELECTOR 0x0000080c
#define VMCS_GUEST_TR_SELECTOR 0x0000080e
#define VMCS_GUEST_ES_LIMIT 0x00004800
#define VMCS_GUEST_CS_LIMIT 0x00004802
#define VMCS_GUEST_SS_LIMIT 0x00004804
#define VMCS_GUEST_DS_LIMIT 0x00004806
#define VMCS_GUEST_FS_LIMIT 0x00004808
#define VMCS_GUEST_GS_LIMIT 0x0000480a
#define VMCS_GUEST_LDTR_LIMIT 0x0000480c
#define VMCS_GUEST_TR_LIMIT 0x0000480e
#define VMCS_GUEST_GDTR_LIMIT 0x00004810
#define VMCS_GUEST_IDTR_LIMIT 0x00004812
#define VMCS_GUEST_ES_ACCESS_RIGHTS 0x00004814
#define VMCS_GUEST_CS_ACCESS_RIGHTS 0x00004816
#define VMCS_GUEST_SS_ACCESS_RIGHTS 0x00004818
#define VMCS_GUEST_DS_ACCESS_RIGHTS 0x0000481a
#define VMCS_GUEST_FS_ACCESS_RIGHTS 0x0000481c
#define VMCS_GUEST_GS_ACCESS_RIGHTS 0x0000481e
#define VMCS_GUEST_LDTR_ACCESS_RIGHTS 0x00004820
#define VMCS_GUEST_TR_ACCESS_RIGHTS 0x00004822
#define VMCS_GUEST_ACTIVITY_STATE 0x00004824
#define VMCS_GUEST_INTERRUPTIBILITY_INFO 0x00004826
#define VMCS_GUEST_SYSENTER_CS 0x0000482a
#define VMCS_GUEST_CR0 0x00006800
#define VMCS_GUEST_CR3 0x00006802
#define VMCS_GUEST_CR4 0x00006804
#define VMCS_GUEST_ES_BASE 0x00006806
#define VMCS_GUEST_CS_BASE 0x00006808
#define VMCS_GUEST_SS_BASE 0x0000680a
#define VMCS_GUEST_DS_BASE 0x0000680c
#define VMCS_GUEST_FS_BASE 0x0000680e
#define VMCS_GUEST_GS_BASE 0x00006810
#define VMCS_GUEST_LDTR_BASE 0x00006812
#define VMCS_GUEST_TR_BASE 0x00006814
#define VMCS_GUEST_GDTR_BASE 0x00006816
#define VMCS_GUEST_IDTR_BASE 0x00006818
#define VMCS_GUEST_DR7 0x0000681a
#define VMCS_GUEST_RSP 0x0000681c
#define VMCS_GUEST_RIP 0x0000681e
#define VMCS_GUEST_RFLAGS 0x00006820
#define VMCS_GUEST_PENDING_DBG_EXCEPTIONS 0x00006822
#define VMCS_GUEST_SYSENTER_ESP 0x00006824
#define VMCS_GUEST_SYSENTER_EIP 0x00006826
#define VMCB_CTRL_INTERCEPT_VEC3 0x0c
#define VMCB_CTRL_INTERCEPT_VEC3_ALL (0xffffffff)
#define VMCB_CTRL_INTERCEPT_VEC4 0x10
#define VMCB_CTRL_INTERCEPT_VEC4_ALL (0x3ff)

#define VMCB_CTRL_ASID 0x058
#define VMCB_EXIT_CODE 0x070
#define VMCB_EXITINFO2 0x080
#define VMCB_CTRL_NP_ENABLE 0x090
#define VMCB_CTRL_NPT_ENABLE_BIT 0

#define VMCB_CTRL_N_CR3 0x0b0
#define VMCB_GUEST_ES_SEL 0x400
#define VMCB_GUEST_ES_ATTR 0x402
#define VMCB_GUEST_ES_LIM 0x404
#define VMCB_GUEST_ES_BASE 0x408
#define VMCB_GUEST_CS_SEL 0x410
#define VMCB_GUEST_CS_ATTR 0x412
#define VMCB_GUEST_CS_LIM 0x414
#define VMCB_GUEST_CS_BASE 0x418
#define VMCB_GUEST_SS_SEL 0x420
#define VMCB_GUEST_SS_ATTR 0x422
#define VMCB_GUEST_SS_LIM 0x424
#define VMCB_GUEST_SS_BASE 0x428
#define VMCB_GUEST_DS_SEL 0x430
#define VMCB_GUEST_DS_ATTR 0x432
#define VMCB_GUEST_DS_LIM 0x434
#define VMCB_GUEST_DS_BASE 0x438
#define VMCB_GUEST_FS_SEL 0x440
#define VMCB_GUEST_FS_ATTR 0x442
#define VMCB_GUEST_FS_LIM 0x444
#define VMCB_GUEST_FS_BASE 0x448
#define VMCB_GUEST_GS_SEL 0x450
#define VMCB_GUEST_GS_ATTR 0x452
#define VMCB_GUEST_GS_LIM 0x454
#define VMCB_GUEST_GS_BASE 0x458

#define VMCB_GUEST_IDTR_SEL 0x480
#define VMCB_GUEST_IDTR_ATTR 0x482
#define VMCB_GUEST_IDTR_LIM 0x484
#define VMCB_GUEST_IDTR_BASE 0x488
#define VMCB_GUEST_GDTR_SEL 0x460
#define VMCB_GUEST_GDTR_ATTR 0x462
#define VMCB_GUEST_GDTR_LIM 0x464
#define VMCB_GUEST_GDTR_BASE 0x468
#define VMCB_GUEST_LDTR_SEL 0x470
#define VMCB_GUEST_LDTR_ATTR 0x472
#define VMCB_GUEST_LDTR_LIM 0x474
#define VMCB_GUEST_LDTR_BASE 0x478
#define VMCB_GUEST_TR_SEL 0x490
#define VMCB_GUEST_TR_ATTR 0x492
#define VMCB_GUEST_TR_LIM 0x494
#define VMCB_GUEST_TR_BASE 0x498

#define VMCB_GUEST_EFER 0x4d0
#define VMCB_GUEST_CR4 0x548
#define VMCB_GUEST_CR3 0x550
#define VMCB_GUEST_CR0 0x558
#define VMCB_GUEST_DR7 0x560
#define VMCB_GUEST_DR6 0x568
#define VMCB_GUEST_RFLAGS 0x570
#define VMCB_GUEST_RIP 0x578
#define VMCB_GUEST_RSP 0x5d8
#define VMCB_GUEST_PAT 0x668
#define VMCB_GUEST_DEBUGCTL 0x670
#define VMCB_RAX 0x5f8
#define SVM_ATTR_G (1 << 15)
#define SVM_ATTR_DB (1 << 14)
#define SVM_ATTR_L (1 << 13)
#define SVM_ATTR_P (1 << 7)
#define SVM_ATTR_S (1 << 4)
#define SVM_ATTR_TYPE_A (1 << 0)
#define SVM_ATTR_TYPE_RW (1 << 1)
#define SVM_ATTR_TYPE_E (1 << 3)
#define SVM_ATTR_TSS_BUSY 0x008b
#define SVM_ATTR_64BIT_CODE                                                    \
  (SVM_ATTR_P | SVM_ATTR_S | SVM_ATTR_TYPE_E | SVM_ATTR_TYPE_RW |              \
   SVM_ATTR_TYPE_A | SVM_ATTR_L | SVM_ATTR_G)
#define SVM_ATTR_64BIT_DATA                                                    \
  (SVM_ATTR_P | SVM_ATTR_S | SVM_ATTR_TYPE_RW | SVM_ATTR_TYPE_A |              \
   SVM_ATTR_DB | SVM_ATTR_G)

#define X86_NEXT_INSN $0xbadc0de
#define X86_PREFIX_SIZE 0xba1d

#define KVM_MAX_VCPU 4
#define KVM_MAX_L2_VMS 4
#define KVM_PAGE_SIZE (1 << 12)
#define KVM_GUEST_PAGES 1024
#define KVM_GUEST_MEM_SIZE (KVM_GUEST_PAGES * KVM_PAGE_SIZE)
#define SZ_4K 0x00001000
#define SZ_64K 0x00010000
#define GENMASK_ULL(h, l)                                                      \
  (((~0ULL) - (1ULL << (l)) + 1ULL) & (~0ULL >> (63 - (h))))

extern char* __start_guest;
static always_inline uintptr_t executor_fn_guest_addr(void* fn)
{
  volatile uintptr_t start = (uintptr_t)&__start_guest;
  volatile uintptr_t offset = SYZOS_ADDR_EXECUTOR_CODE;
  return (uintptr_t)fn - start + offset;
}

typedef enum {
  SYZOS_API_UEXIT = 0,
  SYZOS_API_CODE = 10,
  SYZOS_API_CPUID = 100,
  SYZOS_API_WRMSR = 101,
  SYZOS_API_RDMSR = 102,
  SYZOS_API_WR_CRN = 103,
  SYZOS_API_WR_DRN = 104,
  SYZOS_API_IN_DX = 105,
  SYZOS_API_OUT_DX = 106,
  SYZOS_API_SET_IRQ_HANDLER = 200,
  SYZOS_API_ENABLE_NESTED = 300,
  SYZOS_API_NESTED_CREATE_VM = 301,
  SYZOS_API_NESTED_LOAD_CODE = 302,
  SYZOS_API_NESTED_VMLAUNCH = 303,
  SYZOS_API_NESTED_VMRESUME = 304,
  SYZOS_API_NESTED_LOAD_SYZOS = 310,
  SYZOS_API_NESTED_INTEL_VMWRITE_MASK = 340,
  SYZOS_API_NESTED_AMD_VMCB_WRITE_MASK = 380,
  SYZOS_API_NESTED_AMD_INVLPGA = 381,
  SYZOS_API_NESTED_AMD_STGI = 382,
  SYZOS_API_NESTED_AMD_CLGI = 383,
  SYZOS_API_NESTED_AMD_INJECT_EVENT = 384,
  SYZOS_API_NESTED_AMD_SET_INTERCEPT = 385,
  SYZOS_API_NESTED_AMD_VMLOAD = 386,
  SYZOS_API_NESTED_AMD_VMSAVE = 387,
  SYZOS_API_STOP,
} syzos_api_id;

struct api_call_uexit {
  struct api_call_header header;
  uint64_t exit_code;
};

struct api_call_code {
  struct api_call_header header;
  uint8_t insns[];
};

struct api_call_nested_load_code {
  struct api_call_header header;
  uint64_t vm_id;
  uint8_t insns[];
};

struct api_call_nested_load_syzos {
  struct api_call_header header;
  uint64_t vm_id;
  uint64_t unused_pages;
  uint8_t program[];
};

struct api_call_cpuid {
  struct api_call_header header;
  uint32_t eax;
  uint32_t ecx;
};
struct l2_guest_regs {
  uint64_t rax, rbx, rcx, rdx, rsi, rdi, rbp;
  uint64_t r8, r9, r10, r11, r12, r13, r14, r15;
};
#define MEM_REGION_FLAG_USER_CODE (1 << 0)
#define MEM_REGION_FLAG_DIRTY_LOG (1 << 1)
#define MEM_REGION_FLAG_READONLY (1 << 2)
#define MEM_REGION_FLAG_EXECUTOR_CODE (1 << 3)
#define MEM_REGION_FLAG_GPA0 (1 << 5)
#define MEM_REGION_FLAG_NO_HOST_MEM (1 << 6)
#define MEM_REGION_FLAG_REMAINING (1 << 7)

struct mem_region {
  uint64_t gpa;
  int pages;
  uint32_t flags;
};

struct syzos_boot_args {
  uint32_t region_count;
  uint32_t reserved;
  struct mem_region regions[];
};

struct syzos_globals {
  uint64_t alloc_offset;
  uint64_t total_size;
  uint64_t text_sizes[KVM_MAX_VCPU];
  struct l2_guest_regs l2_ctx[KVM_MAX_VCPU][KVM_MAX_L2_VMS];
  uint64_t active_vm_id[KVM_MAX_VCPU];
};

GUEST_CODE static void guest_uexit(uint64_t exit_code);
GUEST_CODE static void nested_vm_exit_handler_intel(uint64_t exit_reason,
                                                    struct l2_guest_regs* regs);
GUEST_CODE static void nested_vm_exit_handler_amd(uint64_t exit_reason,
                                                  struct l2_guest_regs* regs);
GUEST_CODE static void guest_execute_code(uint8_t* insns, uint64_t size);
GUEST_CODE static void guest_handle_cpuid(uint32_t eax, uint32_t ecx);
GUEST_CODE static void guest_handle_wrmsr(uint64_t reg, uint64_t val);
GUEST_CODE static void guest_handle_rdmsr(uint64_t reg);
GUEST_CODE static void guest_handle_wr_crn(struct api_call_2* cmd);
GUEST_CODE static void guest_handle_wr_drn(struct api_call_2* cmd);
GUEST_CODE static void guest_handle_in_dx(struct api_call_2* cmd);
GUEST_CODE static void guest_handle_out_dx(struct api_call_3* cmd);
GUEST_CODE static void guest_handle_set_irq_handler(struct api_call_2* cmd);
GUEST_CODE static void guest_handle_enable_nested(struct api_call_1* cmd,
                                                  uint64_t cpu_id);
GUEST_CODE static void guest_handle_nested_create_vm(struct api_call_1* cmd,
                                                     uint64_t cpu_id);
GUEST_CODE static void
guest_handle_nested_load_code(struct api_call_nested_load_code* cmd,
                              uint64_t cpu_id);
GUEST_CODE static void
guest_handle_nested_load_syzos(struct api_call_nested_load_syzos* cmd,
                               uint64_t cpu_id);
GUEST_CODE static void guest_handle_nested_vmlaunch(struct api_call_1* cmd,
                                                    uint64_t cpu_id);
GUEST_CODE static void guest_handle_nested_vmresume(struct api_call_1* cmd,
                                                    uint64_t cpu_id);
GUEST_CODE static void
guest_handle_nested_intel_vmwrite_mask(struct api_call_5* cmd, uint64_t cpu_id);
GUEST_CODE static void
guest_handle_nested_amd_vmcb_write_mask(struct api_call_5* cmd,
                                        uint64_t cpu_id);
GUEST_CODE static void guest_handle_nested_amd_invlpga(struct api_call_2* cmd,
                                                       uint64_t cpu_id);
GUEST_CODE static void guest_handle_nested_amd_stgi();
GUEST_CODE static void guest_handle_nested_amd_clgi();
GUEST_CODE static void
guest_handle_nested_amd_inject_event(struct api_call_5* cmd, uint64_t cpu_id);
GUEST_CODE static void
guest_handle_nested_amd_set_intercept(struct api_call_5* cmd, uint64_t cpu_id);
GUEST_CODE static void guest_handle_nested_amd_vmload(struct api_call_1* cmd,
                                                      uint64_t cpu_id);
GUEST_CODE static void guest_handle_nested_amd_vmsave(struct api_call_1* cmd,
                                                      uint64_t cpu_id);

typedef enum {
  UEXIT_END = (uint64_t)-1,
  UEXIT_IRQ = (uint64_t)-2,
  UEXIT_ASSERT = (uint64_t)-3,
  UEXIT_INVALID_MAIN = (uint64_t)-4,
} uexit_code;

typedef enum {
  CPU_VENDOR_INTEL,
  CPU_VENDOR_AMD,
} cpu_vendor_id;

__attribute__((naked)) GUEST_CODE static void dummy_null_handler()
{
  asm("iretq");
}

__attribute__((naked)) GUEST_CODE static void uexit_irq_handler()
{
  asm volatile(R"(
	    movq $-2, %rdi
	    call guest_uexit
	    iretq
	)");
}
__attribute__((used)) GUEST_CODE static void guest_main(uint64_t cpu)
{
  volatile struct syzos_globals* globals =
      (volatile struct syzos_globals*)X86_SYZOS_ADDR_GLOBALS;
  uint64_t size = globals->text_sizes[cpu];
  uint64_t addr = X86_SYZOS_ADDR_USER_CODE + cpu * KVM_PAGE_SIZE;
  while (size >= sizeof(struct api_call_header)) {
    struct api_call_header* cmd = (struct api_call_header*)addr;
    volatile uint64_t call = cmd->call;
    if ((call >= SYZOS_API_STOP) || (cmd->size > size)) {
      guest_uexit(UEXIT_INVALID_MAIN);
      return;
    }
    if (call == SYZOS_API_UEXIT) {
      struct api_call_uexit* ucmd = (struct api_call_uexit*)cmd;
      guest_uexit(ucmd->exit_code);
    } else if (call == SYZOS_API_CODE) {
      struct api_call_code* ccmd = (struct api_call_code*)cmd;
      guest_execute_code(ccmd->insns,
                         cmd->size - sizeof(struct api_call_header));
    } else if (call == SYZOS_API_CPUID) {
      struct api_call_cpuid* ccmd = (struct api_call_cpuid*)cmd;
      guest_handle_cpuid(ccmd->eax, ccmd->ecx);
    } else if (call == SYZOS_API_WRMSR) {
      struct api_call_2* ccmd = (struct api_call_2*)cmd;
      guest_handle_wrmsr(ccmd->args[0], ccmd->args[1]);
    } else if (call == SYZOS_API_RDMSR) {
      struct api_call_1* ccmd = (struct api_call_1*)cmd;
      guest_handle_rdmsr(ccmd->arg);
    } else if (call == SYZOS_API_WR_CRN) {
      guest_handle_wr_crn((struct api_call_2*)cmd);
    } else if (call == SYZOS_API_WR_DRN) {
      guest_handle_wr_drn((struct api_call_2*)cmd);
    } else if (call == SYZOS_API_IN_DX) {
      guest_handle_in_dx((struct api_call_2*)cmd);
    } else if (call == SYZOS_API_OUT_DX) {
      guest_handle_out_dx((struct api_call_3*)cmd);
    } else if (call == SYZOS_API_SET_IRQ_HANDLER) {
      guest_handle_set_irq_handler((struct api_call_2*)cmd);
    } else if (call == SYZOS_API_ENABLE_NESTED) {
      guest_handle_enable_nested((struct api_call_1*)cmd, cpu);
    } else if (call == SYZOS_API_NESTED_CREATE_VM) {
      guest_handle_nested_create_vm((struct api_call_1*)cmd, cpu);
    } else if (call == SYZOS_API_NESTED_LOAD_CODE) {
      guest_handle_nested_load_code((struct api_call_nested_load_code*)cmd,
                                    cpu);
    } else if (call == SYZOS_API_NESTED_LOAD_SYZOS) {
      guest_handle_nested_load_syzos((struct api_call_nested_load_syzos*)cmd,
                                     cpu);
    } else if (call == SYZOS_API_NESTED_VMLAUNCH) {
      guest_handle_nested_vmlaunch((struct api_call_1*)cmd, cpu);
    } else if (call == SYZOS_API_NESTED_VMRESUME) {
      guest_handle_nested_vmresume((struct api_call_1*)cmd, cpu);
    } else if (call == SYZOS_API_NESTED_INTEL_VMWRITE_MASK) {
      guest_handle_nested_intel_vmwrite_mask((struct api_call_5*)cmd, cpu);
    } else if (call == SYZOS_API_NESTED_AMD_VMCB_WRITE_MASK) {
      guest_handle_nested_amd_vmcb_write_mask((struct api_call_5*)cmd, cpu);
    } else if (call == SYZOS_API_NESTED_AMD_INVLPGA) {
      guest_handle_nested_amd_invlpga((struct api_call_2*)cmd, cpu);
    } else if (call == SYZOS_API_NESTED_AMD_STGI) {
      guest_handle_nested_amd_stgi();
    } else if (call == SYZOS_API_NESTED_AMD_CLGI) {
      guest_handle_nested_amd_clgi();
    } else if (call == SYZOS_API_NESTED_AMD_INJECT_EVENT) {
      guest_handle_nested_amd_inject_event((struct api_call_5*)cmd, cpu);
    } else if (call == SYZOS_API_NESTED_AMD_SET_INTERCEPT) {
      guest_handle_nested_amd_set_intercept((struct api_call_5*)cmd, cpu);
    } else if (call == SYZOS_API_NESTED_AMD_VMLOAD) {
      guest_handle_nested_amd_vmload((struct api_call_1*)cmd, cpu);
    } else if (call == SYZOS_API_NESTED_AMD_VMSAVE) {
      guest_handle_nested_amd_vmsave((struct api_call_1*)cmd, cpu);
    }
    addr += cmd->size;
    size -= cmd->size;
  };
  guest_uexit(UEXIT_END);
}

GUEST_CODE static noinline void guest_execute_code(uint8_t* insns,
                                                   uint64_t size)
{
  volatile void (*fn)() = (volatile void (*)())insns;
  fn();
}
__attribute__((used)) GUEST_CODE static noinline void
guest_uexit(uint64_t exit_code)
{
  volatile uint64_t* ptr = (volatile uint64_t*)X86_SYZOS_ADDR_UEXIT;
  asm volatile("movq %0, (%1)" ::"a"(exit_code), "r"(ptr) : "memory");
}

GUEST_CODE static noinline void guest_handle_cpuid(uint32_t eax, uint32_t ecx)
{
  asm volatile("cpuid\n" : : "a"(eax), "c"(ecx) : "rbx", "rdx");
}

GUEST_CODE static noinline void wrmsr(uint64_t reg, uint64_t val)
{
  asm volatile("wrmsr"
               :
               : "c"(reg), "a"((uint32_t)val), "d"((uint32_t)(val >> 32))
               : "memory");
}
GUEST_CODE static noinline void guest_handle_wrmsr(uint64_t reg, uint64_t val)
{
  wrmsr(reg, val);
}

GUEST_CODE static noinline uint64_t rdmsr(uint64_t msr_id)
{
  uint32_t low = 0, high = 0;
  asm volatile("rdmsr" : "=a"(low), "=d"(high) : "c"(msr_id));
  return ((uint64_t)high << 32) | low;
}
GUEST_CODE static noinline void guest_handle_rdmsr(uint64_t reg)
{
  (void)rdmsr(reg);
}
GUEST_CODE static noinline void guest_handle_wr_crn(struct api_call_2* cmd)
{
  uint64_t value = cmd->args[1];
  volatile uint64_t reg = cmd->args[0];
  if (reg == 0) {
    asm volatile("movq %0, %%cr0" ::"r"(value) : "memory");
    return;
  }
  if (reg == 2) {
    asm volatile("movq %0, %%cr2" ::"r"(value) : "memory");
    return;
  }
  if (reg == 3) {
    asm volatile("movq %0, %%cr3" ::"r"(value) : "memory");
    return;
  }
  if (reg == 4) {
    asm volatile("movq %0, %%cr4" ::"r"(value) : "memory");
    return;
  }
  if (reg == 8) {
    asm volatile("movq %0, %%cr8" ::"r"(value) : "memory");
    return;
  }
}
GUEST_CODE static noinline void guest_handle_wr_drn(struct api_call_2* cmd)
{
  uint64_t value = cmd->args[1];
  volatile uint64_t reg = cmd->args[0];
  if (reg == 0) {
    asm volatile("movq %0, %%dr0" ::"r"(value) : "memory");
    return;
  }
  if (reg == 1) {
    asm volatile("movq %0, %%dr1" ::"r"(value) : "memory");
    return;
  }
  if (reg == 2) {
    asm volatile("movq %0, %%dr2" ::"r"(value) : "memory");
    return;
  }
  if (reg == 3) {
    asm volatile("movq %0, %%dr3" ::"r"(value) : "memory");
    return;
  }
  if (reg == 4) {
    asm volatile("movq %0, %%dr4" ::"r"(value) : "memory");
    return;
  }
  if (reg == 5) {
    asm volatile("movq %0, %%dr5" ::"r"(value) : "memory");
    return;
  }
  if (reg == 6) {
    asm volatile("movq %0, %%dr6" ::"r"(value) : "memory");
    return;
  }
  if (reg == 7) {
    asm volatile("movq %0, %%dr7" ::"r"(value) : "memory");
    return;
  }
}
GUEST_CODE static noinline void guest_handle_in_dx(struct api_call_2* cmd)
{
  uint16_t port = cmd->args[0];
  volatile int size = cmd->args[1];
  if (size == 1) {
    uint8_t unused;
    asm volatile("inb %1, %0" : "=a"(unused) : "d"(port));
    return;
  }
  if (size == 2) {
    uint16_t unused;
    asm volatile("inw %1, %0" : "=a"(unused) : "d"(port));
    return;
  }
  if (size == 4) {
    uint32_t unused;
    asm volatile("inl %1, %0" : "=a"(unused) : "d"(port));
  }
  return;
}
GUEST_CODE static noinline void guest_handle_out_dx(struct api_call_3* cmd)
{
  uint16_t port = cmd->args[0];
  volatile int size = cmd->args[1];
  uint32_t data = (uint32_t)cmd->args[2];
  if (size == 1) {
    asm volatile("outb %b0, %w1" ::"a"(data), "d"(port));
    return;
  }
  if (size == 2) {
    asm volatile("outw %w0, %w1" ::"a"(data), "d"(port));
    return;
  }
  if (size == 4) {
    asm volatile("outl %k0, %w1" ::"a"(data), "d"(port));
    return;
  }
}
struct idt_entry_64 {
  uint16_t offset_low;
  uint16_t selector;
  uint8_t ist;
  uint8_t type_attr;
  uint16_t offset_mid;
  uint32_t offset_high;
  uint32_t reserved;
} __attribute__((packed));
GUEST_CODE static void set_idt_gate(uint8_t vector, uint64_t handler)
{
  volatile struct idt_entry_64* idt =
      (volatile struct idt_entry_64*)(X86_SYZOS_ADDR_VAR_IDT);
  volatile struct idt_entry_64* idt_entry = &idt[vector];
  idt_entry->offset_low = (uint16_t)handler;
  idt_entry->offset_mid = (uint16_t)(handler >> 16);
  idt_entry->offset_high = (uint32_t)(handler >> 32);
  idt_entry->selector = X86_SYZOS_SEL_CODE;
  idt_entry->type_attr = 0x8E;
  idt_entry->ist = 0;
  idt_entry->reserved = 0;
}

GUEST_CODE static noinline void
guest_handle_set_irq_handler(struct api_call_2* cmd)
{
  uint8_t vector = (uint8_t)cmd->args[0];
  uint64_t type = cmd->args[1];
  volatile uint64_t handler_addr = 0;
  if (type == 1)
    handler_addr = executor_fn_guest_addr(dummy_null_handler);
  else if (type == 2)
    handler_addr = executor_fn_guest_addr(uexit_irq_handler);
  set_idt_gate(vector, handler_addr);
}

GUEST_CODE static cpu_vendor_id get_cpu_vendor(void)
{
  uint32_t ebx, eax = 0;
  asm volatile("cpuid" : "+a"(eax), "=b"(ebx) : : "ecx", "edx");
  if (ebx == 0x756e6547) {
    return CPU_VENDOR_INTEL;
  } else if (ebx == 0x68747541) {
    return CPU_VENDOR_AMD;
  } else {
    guest_uexit(UEXIT_ASSERT);
    return CPU_VENDOR_INTEL;
  }
}

GUEST_CODE static inline uint64_t read_cr0(void)
{
  uint64_t val;
  asm volatile("mov %%cr0, %0" : "=r"(val));
  return val;
}

GUEST_CODE static inline uint64_t read_cr3(void)
{
  uint64_t val;
  asm volatile("mov %%cr3, %0" : "=r"(val));
  return val;
}

GUEST_CODE static inline uint64_t read_cr4(void)
{
  uint64_t val;
  asm volatile("mov %%cr4, %0" : "=r"(val));
  return val;
}

GUEST_CODE static inline void write_cr4(uint64_t val)
{
  asm volatile("mov %0, %%cr4" : : "r"(val));
}

GUEST_CODE static noinline void vmwrite(uint64_t field, uint64_t value)
{
  uint8_t error = 0;
  asm volatile("vmwrite %%rax, %%rbx; setna %0"
               : "=q"(error)
               : "a"(value), "b"(field)
               : "cc", "memory");
  if (error)
    guest_uexit(UEXIT_ASSERT);
}

GUEST_CODE static noinline uint64_t vmread(uint64_t field)
{
  uint64_t value;
  asm volatile("vmread %%rbx, %%rax" : "=a"(value) : "b"(field) : "cc");
  return value;
}

GUEST_CODE static inline void nested_vmptrld(uint64_t cpu_id, uint64_t vm_id)
{
  uint64_t vmcs_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id);
  uint8_t error = 0;
  asm volatile("vmptrld %1; setna %0"
               : "=q"(error)
               : "m"(vmcs_addr)
               : "memory", "cc");
  if (error)
    guest_uexit(0xE2BAD2);
}

GUEST_CODE static noinline void vmcb_write16(uint64_t vmcb, uint16_t offset,
                                             uint16_t val)
{
  *((volatile uint16_t*)(vmcb + offset)) = val;
}

GUEST_CODE static noinline void vmcb_write32(uint64_t vmcb, uint16_t offset,
                                             uint32_t val)
{
  *((volatile uint32_t*)(vmcb + offset)) = val;
}

GUEST_CODE static noinline uint32_t vmcb_read32(uint64_t vmcb, uint16_t offset)
{
  return *((volatile uint32_t*)(vmcb + offset));
}

GUEST_CODE static noinline void vmcb_write64(uint64_t vmcb, uint16_t offset,
                                             uint64_t val)
{
  *((volatile uint64_t*)(vmcb + offset)) = val;
}

GUEST_CODE static noinline uint64_t vmcb_read64(volatile uint8_t* vmcb,
                                                uint16_t offset)
{
  return *((volatile uint64_t*)(vmcb + offset));
}

GUEST_CODE static void guest_memset(void* s, uint8_t c, int size)
{
  volatile uint8_t* p = (volatile uint8_t*)s;
  for (int i = 0; i < size; i++)
    p[i] = c;
}

GUEST_CODE static void guest_memcpy(void* dst, void* src, int size)
{
  volatile uint8_t* d = (volatile uint8_t*)dst;
  volatile uint8_t* s = (volatile uint8_t*)src;
  for (int i = 0; i < size; i++)
    d[i] = s[i];
}

GUEST_CODE static noinline void nested_enable_vmx_intel(uint64_t cpu_id)
{
  uint64_t vmxon_addr = X86_SYZOS_ADDR_VM_ARCH_SPECIFIC(cpu_id);
  uint64_t cr4 = read_cr4();
  cr4 |= X86_CR4_VMXE;
  write_cr4(cr4);
  uint64_t feature_control = rdmsr(X86_MSR_IA32_FEATURE_CONTROL);
  if ((feature_control & 1) == 0) {
    feature_control |= 0b101;
    asm volatile("wrmsr"
                 :
                 : "d"(0x0), "c"(X86_MSR_IA32_FEATURE_CONTROL),
                   "A"(feature_control));
  }
  *(uint32_t*)vmxon_addr = rdmsr(X86_MSR_IA32_VMX_BASIC);
  uint8_t error;
  asm volatile("vmxon %1; setna %0"
               : "=q"(error)
               : "m"(vmxon_addr)
               : "memory", "cc");
  if (error) {
    guest_uexit(0xE2BAD0);
    return;
  }
}

GUEST_CODE static noinline void nested_enable_svm_amd(uint64_t cpu_id)
{
  uint64_t hsave_addr = X86_SYZOS_ADDR_VM_ARCH_SPECIFIC(cpu_id);
  uint64_t efer = rdmsr(X86_MSR_IA32_EFER);
  efer |= X86_EFER_SVME;
  wrmsr(X86_MSR_IA32_EFER, efer);
  wrmsr(X86_MSR_VM_HSAVE_PA, hsave_addr);
}

GUEST_CODE static noinline void
guest_handle_enable_nested(struct api_call_1* cmd, uint64_t cpu_id)
{
  if (get_cpu_vendor() == CPU_VENDOR_INTEL) {
    nested_enable_vmx_intel(cpu_id);
  } else {
    nested_enable_svm_amd(cpu_id);
  }
}
GUEST_CODE static uint64_t get_unused_memory_size()
{
  volatile struct syzos_boot_args* args =
      (volatile struct syzos_boot_args*)X86_SYZOS_ADDR_BOOT_ARGS;
  for (uint32_t i = 0; i < args->region_count; i++) {
    if (args->regions[i].gpa == X86_SYZOS_ADDR_UNUSED)
      return args->regions[i].pages * KVM_PAGE_SIZE;
  }
  return 0;
}
GUEST_CODE static uint64_t guest_alloc_page()
{
  volatile struct syzos_globals* globals =
      (volatile struct syzos_globals*)X86_SYZOS_ADDR_GLOBALS;
  if (globals->total_size == 0) {
    uint64_t size = get_unused_memory_size();
    __sync_val_compare_and_swap(&globals->total_size, 0, size);
  }
  uint64_t offset = __sync_fetch_and_add(&globals->alloc_offset, KVM_PAGE_SIZE);
  if (offset >= globals->total_size)
    guest_uexit(UEXIT_ASSERT);
  uint64_t ptr = X86_SYZOS_ADDR_UNUSED + offset;
  guest_memset((void*)ptr, 0, KVM_PAGE_SIZE);
  return ptr;
}
GUEST_CODE static void l2_map_page(uint64_t cpu_id, uint64_t vm_id,
                                   uint64_t gpa, uint64_t host_pa,
                                   uint64_t flags)
{
  uint64_t pml4_addr = X86_SYZOS_ADDR_VM_PGTABLE(cpu_id, vm_id);
  volatile uint64_t* pml4 = (volatile uint64_t*)pml4_addr;
  uint64_t pml4_idx = (gpa >> 39) & 0x1FF;
  if (!(pml4[pml4_idx] & X86_PDE64_PRESENT)) {
    uint64_t page = guest_alloc_page();
    pml4[pml4_idx] = page | X86_PDE64_PRESENT | X86_PDE64_RW | X86_PDE64_USER;
  }
  volatile uint64_t* pdpt = (volatile uint64_t*)(pml4[pml4_idx] & ~0xFFF);
  uint64_t pdpt_idx = (gpa >> 30) & 0x1FF;
  if (!(pdpt[pdpt_idx] & X86_PDE64_PRESENT)) {
    uint64_t page = guest_alloc_page();
    pdpt[pdpt_idx] = page | X86_PDE64_PRESENT | X86_PDE64_RW | X86_PDE64_USER;
  }
  volatile uint64_t* pd = (volatile uint64_t*)(pdpt[pdpt_idx] & ~0xFFF);
  uint64_t pd_idx = (gpa >> 21) & 0x1FF;
  if (!(pd[pd_idx] & X86_PDE64_PRESENT)) {
    uint64_t page = guest_alloc_page();
    pd[pd_idx] = page | X86_PDE64_PRESENT | X86_PDE64_RW | X86_PDE64_USER;
  }
  volatile uint64_t* pt = (volatile uint64_t*)(pd[pd_idx] & ~0xFFF);
  uint64_t pt_idx = (gpa >> 12) & 0x1FF;
  if (!(pt[pt_idx] & X86_PDE64_PRESENT))
    pt[pt_idx] = (host_pa & ~0xFFF) | flags;
}

GUEST_CODE static noinline void setup_l2_page_tables(cpu_vendor_id vendor,
                                                     uint64_t cpu_id,
                                                     uint64_t vm_id,
                                                     uint64_t unused_pages)
{
  uint64_t flags = X86_PDE64_PRESENT | X86_PDE64_RW | X86_PDE64_USER;
  if (vendor == CPU_VENDOR_INTEL) {
    flags |= EPT_MEMTYPE_WB | EPT_ACCESSED | EPT_DIRTY;
  } else {
    flags |= X86_PDE64_ACCESSED | X86_PDE64_DIRTY;
  }
  volatile struct syzos_boot_args* args =
      (volatile struct syzos_boot_args*)X86_SYZOS_ADDR_BOOT_ARGS;
  for (uint32_t i = 0; i < args->region_count; i++) {
    struct mem_region r;
    r.gpa = args->regions[i].gpa;
    r.pages = args->regions[i].pages;
    r.flags = args->regions[i].flags;
    if (r.flags & MEM_REGION_FLAG_NO_HOST_MEM)
      continue;
    if (r.flags & MEM_REGION_FLAG_REMAINING) {
      r.pages = (unused_pages < 16) ? 16 : unused_pages;
    }
    for (int p = 0; p < r.pages; p++) {
      uint64_t gpa = r.gpa + (p * KVM_PAGE_SIZE);
      uint64_t backing;
      if (r.gpa == X86_SYZOS_ADDR_USER_CODE && p == 0) {
        backing = X86_SYZOS_ADDR_VM_CODE(cpu_id, vm_id);
      } else if (r.gpa == X86_SYZOS_ADDR_STACK_BOTTOM) {
        backing = X86_SYZOS_ADDR_VM_STACK(cpu_id, vm_id);
      } else {
        backing = gpa;
      }
      l2_map_page(cpu_id, vm_id, gpa, backing, flags);
    }
  }
}

GUEST_CODE static noinline void init_vmcs_control_fields(uint64_t cpu_id,
                                                         uint64_t vm_id)
{
  uint64_t vmx_msr = rdmsr(X86_MSR_IA32_VMX_TRUE_PINBASED_CTLS);
  vmwrite(VMCS_PIN_BASED_VM_EXEC_CONTROL, (uint32_t)vmx_msr);
  vmx_msr = (uint32_t)rdmsr(X86_MSR_IA32_VMX_PROCBASED_CTLS2);
  vmx_msr |= SECONDARY_EXEC_ENABLE_EPT | SECONDARY_EXEC_ENABLE_RDTSCP;
  vmwrite(VMCS_SECONDARY_VM_EXEC_CONTROL, vmx_msr);
  vmx_msr = rdmsr(X86_MSR_IA32_VMX_TRUE_PROCBASED_CTLS);
  vmx_msr |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
  vmx_msr |= CPU_BASED_HLT_EXITING | CPU_BASED_RDTSC_EXITING;
  vmwrite(VMCS_CPU_BASED_VM_EXEC_CONTROL, (uint32_t)vmx_msr);
  vmx_msr = rdmsr(X86_MSR_IA32_VMX_TRUE_EXIT_CTLS);
  vmwrite(VMCS_VM_EXIT_CONTROLS,
          (uint32_t)vmx_msr | VM_EXIT_HOST_ADDR_SPACE_SIZE);
  vmx_msr = rdmsr(X86_MSR_IA32_VMX_TRUE_ENTRY_CTLS);
  vmwrite(VMCS_VM_ENTRY_CONTROLS, (uint32_t)vmx_msr | VM_ENTRY_IA32E_MODE);
  uint64_t eptp =
      (X86_SYZOS_ADDR_VM_PGTABLE(cpu_id, vm_id) & ~0xFFF) | (6 << 0) | (3 << 3);
  vmwrite(VMCS_EPT_POINTER, eptp);
  vmwrite(VMCS_CR0_GUEST_HOST_MASK, 0);
  vmwrite(VMCS_CR4_GUEST_HOST_MASK, 0);
  vmwrite(VMCS_CR0_READ_SHADOW, read_cr0());
  vmwrite(VMCS_CR4_READ_SHADOW, read_cr4());
  vmwrite(VMCS_MSR_BITMAP, 0);
  vmwrite(VMCS_VMREAD_BITMAP, 0);
  vmwrite(VMCS_VMWRITE_BITMAP, 0);
  vmwrite(VMCS_EXCEPTION_BITMAP, (1 << 6));
  vmwrite(VMCS_VIRTUAL_PROCESSOR_ID, 0);
  vmwrite(VMCS_POSTED_INTR_NV, 0);
  vmwrite(VMCS_PAGE_FAULT_ERROR_CODE_MASK, 0);
  vmwrite(VMCS_PAGE_FAULT_ERROR_CODE_MATCH, -1);
  vmwrite(VMCS_CR3_TARGET_COUNT, 0);
  vmwrite(VMCS_VM_EXIT_MSR_STORE_COUNT, 0);
  vmwrite(VMCS_VM_EXIT_MSR_LOAD_COUNT, 0);
  vmwrite(VMCS_VM_ENTRY_MSR_LOAD_COUNT, 0);
  vmwrite(VMCS_VM_ENTRY_INTR_INFO_FIELD, 0);
  vmwrite(VMCS_TPR_THRESHOLD, 0);
}
typedef enum {
  SYZOS_NESTED_EXIT_REASON_HLT = 1,
  SYZOS_NESTED_EXIT_REASON_INVD = 2,
  SYZOS_NESTED_EXIT_REASON_CPUID = 3,
  SYZOS_NESTED_EXIT_REASON_RDTSC = 4,
  SYZOS_NESTED_EXIT_REASON_RDTSCP = 5,
  SYZOS_NESTED_EXIT_REASON_EPT_VIOLATION = 6,
  SYZOS_NESTED_EXIT_REASON_UNKNOWN = 0xFF,
} syz_nested_exit_reason;

GUEST_CODE static void handle_nested_uexit(uint64_t exit_code)
{
  uint64_t level = (exit_code >> 56) + 1;
  exit_code = (exit_code & 0x00FFFFFFFFFFFFFFULL) | (level << 56);
  guest_uexit(exit_code);
}

GUEST_CODE static void guest_uexit_l2(uint64_t exit_reason,
                                      syz_nested_exit_reason mapped_reason,
                                      cpu_vendor_id vendor)
{
  if (mapped_reason != SYZOS_NESTED_EXIT_REASON_UNKNOWN) {
    guest_uexit(0xe2e20000 | mapped_reason);
  } else if (vendor == CPU_VENDOR_INTEL) {
    guest_uexit(0xe2110000 | exit_reason);
  } else {
    guest_uexit(0xe2aa0000 | exit_reason);
  }
}

#define EXIT_REASON_CPUID 0xa
#define EXIT_REASON_HLT 0xc
#define EXIT_REASON_INVD 0xd
#define EXIT_REASON_EPT_VIOLATION 0x30
#define EXIT_REASON_RDTSC 0x10
#define EXIT_REASON_RDTSCP 0x33

GUEST_CODE static syz_nested_exit_reason
map_intel_exit_reason(uint64_t basic_reason)
{
  volatile uint64_t reason = basic_reason;
  if (reason == EXIT_REASON_HLT)
    return SYZOS_NESTED_EXIT_REASON_HLT;
  if (reason == EXIT_REASON_INVD)
    return SYZOS_NESTED_EXIT_REASON_INVD;
  if (reason == EXIT_REASON_CPUID)
    return SYZOS_NESTED_EXIT_REASON_CPUID;
  if (reason == EXIT_REASON_RDTSC)
    return SYZOS_NESTED_EXIT_REASON_RDTSC;
  if (reason == EXIT_REASON_RDTSCP)
    return SYZOS_NESTED_EXIT_REASON_RDTSCP;
  if (reason == EXIT_REASON_EPT_VIOLATION)
    return SYZOS_NESTED_EXIT_REASON_EPT_VIOLATION;
  return SYZOS_NESTED_EXIT_REASON_UNKNOWN;
}

GUEST_CODE static void advance_l2_rip_intel(uint64_t basic_reason)
{
  volatile uint64_t reason = basic_reason;
  uint64_t rip = vmread(VMCS_GUEST_RIP);
  if ((reason == EXIT_REASON_INVD) || (reason == EXIT_REASON_CPUID) ||
      (reason == EXIT_REASON_RDTSC)) {
    rip += 2;
  } else if (reason == EXIT_REASON_RDTSCP) {
    rip += 3;
  }
  vmwrite(VMCS_GUEST_RIP, rip);
}
__attribute__((used)) GUEST_CODE static void
nested_vm_exit_handler_intel(uint64_t exit_reason, struct l2_guest_regs* regs)
{
  volatile struct syzos_globals* globals =
      (volatile struct syzos_globals*)X86_SYZOS_ADDR_GLOBALS;
  uint64_t cpu_id =
      *(uint64_t*)((char*)regs + sizeof(struct l2_guest_regs) + 7 * 8);
  uint64_t vm_id = globals->active_vm_id[cpu_id];
  guest_memcpy((void*)&globals->l2_ctx[cpu_id][vm_id], regs,
               sizeof(struct l2_guest_regs));
  uint64_t basic_reason = exit_reason & 0xFFFF;
  if (basic_reason == EXIT_REASON_EPT_VIOLATION) {
    uint64_t gpa = vmread(VMCS_GUEST_PHYSICAL_ADDRESS);
    if ((gpa & ~0xFFF) == X86_SYZOS_ADDR_EXIT) {
      handle_nested_uexit(regs->rax);
      vmwrite(VMCS_GUEST_RIP, vmread(VMCS_GUEST_RIP) + 3);
      return;
    }
  }
  syz_nested_exit_reason mapped_reason = map_intel_exit_reason(basic_reason);
  guest_uexit_l2(exit_reason, mapped_reason, CPU_VENDOR_INTEL);
  advance_l2_rip_intel(basic_reason);
}

extern char after_vmentry_label;
__attribute__((naked)) GUEST_CODE static void
nested_vm_exit_handler_intel_asm(void)
{
  asm volatile(R"(
      push %%r15
      push %%r14
      push %%r13
      push %%r12
      push %%r11
      push %%r10
      push %%r9
      push %%r8
      push %%rbp
      push %%rdi
      push %%rsi
      push %%rdx
      push %%rcx
      push %%rbx
      push %%rax
      mov %%rsp, %%rsi
      mov %[vm_exit_reason], %%rbx
      vmread %%rbx, %%rdi
      call nested_vm_exit_handler_intel
      add %[l2_regs_size], %%rsp
      pop %%r15
      pop %%r14
      pop %%r13
      pop %%r12
      pop %%rbp
      pop %%rbx
      add $16, %%rsp
      add $128, %%rsp
      jmp after_vmentry_label
	)"
               :
               : [l2_regs_size] "i"(sizeof(struct l2_guest_regs)),
                 [vm_exit_reason] "i"(VMCS_VM_EXIT_REASON)
               : "memory", "cc", "rbx", "rdi", "rsi");
}

#define VMEXIT_RDTSC 0x6e
#define VMEXIT_CPUID 0x72
#define VMEXIT_INVD 0x76
#define VMEXIT_HLT 0x78
#define VMEXIT_NPF 0x400
#define VMEXIT_RDTSCP 0x87

GUEST_CODE static syz_nested_exit_reason
map_amd_exit_reason(uint64_t basic_reason)
{
  volatile uint64_t reason = basic_reason;
  if (reason == VMEXIT_HLT)
    return SYZOS_NESTED_EXIT_REASON_HLT;
  if (reason == VMEXIT_INVD)
    return SYZOS_NESTED_EXIT_REASON_INVD;
  if (reason == VMEXIT_CPUID)
    return SYZOS_NESTED_EXIT_REASON_CPUID;
  if (reason == VMEXIT_RDTSC)
    return SYZOS_NESTED_EXIT_REASON_RDTSC;
  if (reason == VMEXIT_RDTSCP)
    return SYZOS_NESTED_EXIT_REASON_RDTSCP;
  if (reason == VMEXIT_NPF)
    return SYZOS_NESTED_EXIT_REASON_EPT_VIOLATION;
  return SYZOS_NESTED_EXIT_REASON_UNKNOWN;
}

GUEST_CODE static void advance_l2_rip_amd(uint64_t basic_reason,
                                          uint64_t cpu_id, uint64_t vm_id)
{
  volatile uint64_t reason = basic_reason;
  uint64_t vmcb_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id);
  uint64_t rip = vmcb_read64((volatile uint8_t*)vmcb_addr, VMCB_GUEST_RIP);
  if ((reason == VMEXIT_INVD) || (reason == VMEXIT_CPUID) ||
      (reason == VMEXIT_RDTSC)) {
    rip += 2;
  } else if (reason == VMEXIT_RDTSCP) {
    rip += 3;
  }
  vmcb_write64(vmcb_addr, VMCB_GUEST_RIP, rip);
}

__attribute__((used)) GUEST_CODE static void
nested_vm_exit_handler_amd(uint64_t exit_reason, struct l2_guest_regs* regs)
{
  volatile struct syzos_globals* globals =
      (volatile struct syzos_globals*)X86_SYZOS_ADDR_GLOBALS;
  uint64_t cpu_id =
      *(uint64_t*)((char*)regs + sizeof(struct l2_guest_regs) + 8 * 8);
  uint64_t vm_id = globals->active_vm_id[cpu_id];
  guest_memcpy((void*)&globals->l2_ctx[cpu_id][vm_id], regs,
               sizeof(struct l2_guest_regs));
  volatile uint64_t basic_reason = exit_reason & 0xFFFF;
  if (basic_reason == VMEXIT_NPF) {
    uint64_t vmcb_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id);
    uint64_t fault_gpa =
        vmcb_read64((volatile uint8_t*)vmcb_addr, VMCB_EXITINFO2);
    if ((fault_gpa & ~0xFFF) == X86_SYZOS_ADDR_EXIT) {
      handle_nested_uexit(regs->rax);
      uint64_t rip = vmcb_read64((volatile uint8_t*)vmcb_addr, VMCB_GUEST_RIP);
      vmcb_write64(vmcb_addr, VMCB_GUEST_RIP, rip + 3);
      return;
    }
  }
  syz_nested_exit_reason mapped_reason = map_amd_exit_reason(basic_reason);
  guest_uexit_l2(exit_reason, mapped_reason, CPU_VENDOR_AMD);
  advance_l2_rip_amd(basic_reason, cpu_id, vm_id);
}

GUEST_CODE static noinline void init_vmcs_host_state(void)
{
  vmwrite(VMCS_HOST_CS_SELECTOR, X86_SYZOS_SEL_CODE);
  vmwrite(VMCS_HOST_DS_SELECTOR, X86_SYZOS_SEL_DATA);
  vmwrite(VMCS_HOST_ES_SELECTOR, X86_SYZOS_SEL_DATA);
  vmwrite(VMCS_HOST_SS_SELECTOR, X86_SYZOS_SEL_DATA);
  vmwrite(VMCS_HOST_FS_SELECTOR, X86_SYZOS_SEL_DATA);
  vmwrite(VMCS_HOST_GS_SELECTOR, X86_SYZOS_SEL_DATA);
  vmwrite(VMCS_HOST_TR_SELECTOR, X86_SYZOS_SEL_TSS64);
  vmwrite(VMCS_HOST_TR_BASE, X86_SYZOS_ADDR_VAR_TSS);
  vmwrite(VMCS_HOST_GDTR_BASE, X86_SYZOS_ADDR_GDT);
  vmwrite(VMCS_HOST_IDTR_BASE, X86_SYZOS_ADDR_VAR_IDT);
  vmwrite(VMCS_HOST_FS_BASE, rdmsr(X86_MSR_FS_BASE));
  vmwrite(VMCS_HOST_GS_BASE, rdmsr(X86_MSR_GS_BASE));
  vmwrite(VMCS_HOST_RIP, (uintptr_t)nested_vm_exit_handler_intel_asm);
  vmwrite(VMCS_HOST_CR0, read_cr0());
  vmwrite(VMCS_HOST_CR3, read_cr3());
  vmwrite(VMCS_HOST_CR4, read_cr4());
  vmwrite(VMCS_HOST_IA32_PAT, rdmsr(X86_MSR_IA32_CR_PAT));
  vmwrite(VMCS_HOST_IA32_EFER, rdmsr(X86_MSR_IA32_EFER));
  vmwrite(VMCS_HOST_IA32_PERF_GLOBAL_CTRL,
          rdmsr(X86_MSR_CORE_PERF_GLOBAL_CTRL));
  vmwrite(VMCS_HOST_IA32_SYSENTER_CS, rdmsr(X86_MSR_IA32_SYSENTER_CS));
  vmwrite(VMCS_HOST_IA32_SYSENTER_ESP, rdmsr(X86_MSR_IA32_SYSENTER_ESP));
  vmwrite(VMCS_HOST_IA32_SYSENTER_EIP, rdmsr(X86_MSR_IA32_SYSENTER_EIP));
}

#define COPY_VMCS_FIELD(GUEST_FIELD, HOST_FIELD)                               \
  vmwrite(GUEST_FIELD, vmread(HOST_FIELD))

#define SETUP_L2_SEGMENT(SEG, SELECTOR, BASE, LIMIT, AR)                       \
  vmwrite(VMCS_GUEST_##SEG##_SELECTOR, SELECTOR);                              \
  vmwrite(VMCS_GUEST_##SEG##_BASE, BASE);                                      \
  vmwrite(VMCS_GUEST_##SEG##_LIMIT, LIMIT);                                    \
  vmwrite(VMCS_GUEST_##SEG##_ACCESS_RIGHTS, AR);

GUEST_CODE static noinline void init_vmcs_guest_state(uint64_t cpu_id,
                                                      uint64_t vm_id)
{
  uint64_t l2_code_addr = X86_SYZOS_ADDR_VM_CODE(cpu_id, vm_id);
  uint64_t l2_stack_addr = X86_SYZOS_ADDR_VM_STACK(cpu_id, vm_id);
  SETUP_L2_SEGMENT(CS, vmread(VMCS_HOST_CS_SELECTOR), 0, 0xFFFFFFFF,
                   VMX_AR_64BIT_CODE);
  SETUP_L2_SEGMENT(DS, vmread(VMCS_HOST_DS_SELECTOR), 0, 0xFFFFFFFF,
                   VMX_AR_64BIT_DATA_STACK);
  SETUP_L2_SEGMENT(ES, vmread(VMCS_HOST_ES_SELECTOR), 0, 0xFFFFFFFF,
                   VMX_AR_64BIT_DATA_STACK);
  SETUP_L2_SEGMENT(SS, vmread(VMCS_HOST_SS_SELECTOR), 0, 0xFFFFFFFF,
                   VMX_AR_64BIT_DATA_STACK);
  SETUP_L2_SEGMENT(FS, vmread(VMCS_HOST_FS_SELECTOR), vmread(VMCS_HOST_FS_BASE),
                   0xFFFFFFFF, VMX_AR_64BIT_DATA_STACK);
  SETUP_L2_SEGMENT(GS, vmread(VMCS_HOST_GS_SELECTOR), vmread(VMCS_HOST_GS_BASE),
                   0xFFFFFFFF, VMX_AR_64BIT_DATA_STACK);
  SETUP_L2_SEGMENT(TR, vmread(VMCS_HOST_TR_SELECTOR), vmread(VMCS_HOST_TR_BASE),
                   0x67, VMX_AR_TSS_BUSY);
  SETUP_L2_SEGMENT(LDTR, 0, 0, 0, VMX_AR_LDTR_UNUSABLE);
  vmwrite(VMCS_GUEST_CR0, vmread(VMCS_HOST_CR0));
  vmwrite(VMCS_GUEST_CR3, vmread(VMCS_HOST_CR3));
  vmwrite(VMCS_GUEST_CR4, vmread(VMCS_HOST_CR4));
  vmwrite(VMCS_GUEST_RIP, l2_code_addr);
  vmwrite(VMCS_GUEST_RSP, l2_stack_addr + KVM_PAGE_SIZE - 8);
  vmwrite(VMCS_GUEST_RFLAGS, RFLAGS_1_BIT);
  vmwrite(VMCS_GUEST_DR7, 0x400);
  COPY_VMCS_FIELD(VMCS_GUEST_IA32_EFER, VMCS_HOST_IA32_EFER);
  COPY_VMCS_FIELD(VMCS_GUEST_IA32_PAT, VMCS_HOST_IA32_PAT);
  COPY_VMCS_FIELD(VMCS_GUEST_IA32_PERF_GLOBAL_CTRL,
                  VMCS_HOST_IA32_PERF_GLOBAL_CTRL);
  COPY_VMCS_FIELD(VMCS_GUEST_SYSENTER_CS, VMCS_HOST_IA32_SYSENTER_CS);
  COPY_VMCS_FIELD(VMCS_GUEST_SYSENTER_ESP, VMCS_HOST_IA32_SYSENTER_ESP);
  COPY_VMCS_FIELD(VMCS_GUEST_SYSENTER_EIP, VMCS_HOST_IA32_SYSENTER_EIP);
  vmwrite(VMCS_GUEST_IA32_DEBUGCTL, 0);
  vmwrite(VMCS_GUEST_GDTR_BASE, vmread(VMCS_HOST_GDTR_BASE));
  vmwrite(VMCS_GUEST_GDTR_LIMIT, 0xffff);
  vmwrite(VMCS_GUEST_IDTR_BASE, vmread(VMCS_HOST_IDTR_BASE));
  vmwrite(VMCS_GUEST_IDTR_LIMIT, 0xffff);
  vmwrite(VMCS_LINK_POINTER, 0xffffffffffffffff);
  vmwrite(VMCS_GUEST_ACTIVITY_STATE, 0);
  vmwrite(VMCS_GUEST_INTERRUPTIBILITY_INFO, 0);
  vmwrite(VMCS_GUEST_PENDING_DBG_EXCEPTIONS, 0);
  vmwrite(VMCS_VMX_PREEMPTION_TIMER_VALUE, 0);
  vmwrite(VMCS_GUEST_INTR_STATUS, 0);
  vmwrite(VMCS_GUEST_PML_INDEX, 0);
}

GUEST_CODE static noinline void nested_create_vm_intel(struct api_call_1* cmd,
                                                       uint64_t cpu_id)
{
  uint64_t vm_id = cmd->arg;
  uint64_t vmcs_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id);
  uint8_t error = 0;
  uint64_t l2_pml4_addr = X86_SYZOS_ADDR_VM_PGTABLE(cpu_id, vm_id);
  uint64_t l2_msr_bitmap = X86_SYZOS_ADDR_MSR_BITMAP(cpu_id, vm_id);
  *(uint32_t*)vmcs_addr = rdmsr(X86_MSR_IA32_VMX_BASIC);
  asm volatile("vmclear %1; setna %0"
               : "=q"(error)
               : "m"(vmcs_addr)
               : "memory", "cc");
  if (error) {
    guest_uexit(0xE2BAD1);
    return;
  }
  nested_vmptrld(cpu_id, vm_id);
  guest_memset((void*)l2_pml4_addr, 0, KVM_PAGE_SIZE);
  guest_memset((void*)l2_msr_bitmap, 0, KVM_PAGE_SIZE);
  setup_l2_page_tables(CPU_VENDOR_INTEL, cpu_id, vm_id, 0);
  init_vmcs_control_fields(cpu_id, vm_id);
  init_vmcs_host_state();
  init_vmcs_guest_state(cpu_id, vm_id);
}
#define SETUP_L2_SEGMENT_SVM(VMBC_PTR, SEG_NAME, SELECTOR, BASE, LIMIT, ATTR)  \
  vmcb_write16(VMBC_PTR, VMCB_GUEST_##SEG_NAME##_SEL, SELECTOR);               \
  vmcb_write16(VMBC_PTR, VMCB_GUEST_##SEG_NAME##_ATTR, ATTR);                  \
  vmcb_write32(VMBC_PTR, VMCB_GUEST_##SEG_NAME##_LIM, LIMIT);                  \
  vmcb_write64(VMBC_PTR, VMCB_GUEST_##SEG_NAME##_BASE, BASE);

GUEST_CODE static noinline void init_vmcb_guest_state(uint64_t cpu_id,
                                                      uint64_t vm_id)
{
  uint64_t vmcb_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id);
  uint64_t l2_code_addr = X86_SYZOS_ADDR_VM_CODE(cpu_id, vm_id);
  uint64_t l2_stack_addr = X86_SYZOS_ADDR_VM_STACK(cpu_id, vm_id);
  uint64_t npt_pml4_addr = X86_SYZOS_ADDR_VM_PGTABLE(cpu_id, vm_id);
  SETUP_L2_SEGMENT_SVM(vmcb_addr, CS, X86_SYZOS_SEL_CODE, 0, 0xFFFFFFFF,
                       SVM_ATTR_64BIT_CODE);
  SETUP_L2_SEGMENT_SVM(vmcb_addr, DS, X86_SYZOS_SEL_DATA, 0, 0xFFFFFFFF,
                       SVM_ATTR_64BIT_DATA);
  SETUP_L2_SEGMENT_SVM(vmcb_addr, ES, X86_SYZOS_SEL_DATA, 0, 0xFFFFFFFF,
                       SVM_ATTR_64BIT_DATA);
  SETUP_L2_SEGMENT_SVM(vmcb_addr, SS, X86_SYZOS_SEL_DATA, 0, 0xFFFFFFFF,
                       SVM_ATTR_64BIT_DATA);
  SETUP_L2_SEGMENT_SVM(vmcb_addr, FS, X86_SYZOS_SEL_DATA, 0, 0xFFFFFFFF,
                       SVM_ATTR_64BIT_DATA);
  SETUP_L2_SEGMENT_SVM(vmcb_addr, GS, X86_SYZOS_SEL_DATA, 0, 0xFFFFFFFF,
                       SVM_ATTR_64BIT_DATA);
  SETUP_L2_SEGMENT_SVM(vmcb_addr, TR, X86_SYZOS_SEL_TSS64,
                       X86_SYZOS_ADDR_VAR_TSS, 0x67, SVM_ATTR_TSS_BUSY);
  SETUP_L2_SEGMENT_SVM(vmcb_addr, LDTR, 0, 0, 0, SVM_ATTR_LDTR_UNUSABLE);
  vmcb_write64(vmcb_addr, VMCB_GUEST_CR0, read_cr0() | X86_CR0_WP);
  vmcb_write64(vmcb_addr, VMCB_GUEST_CR3, read_cr3());
  vmcb_write64(vmcb_addr, VMCB_GUEST_CR4, read_cr4());
  vmcb_write64(vmcb_addr, VMCB_GUEST_RIP, l2_code_addr);
  vmcb_write64(vmcb_addr, VMCB_GUEST_RSP, l2_stack_addr + KVM_PAGE_SIZE - 8);
  vmcb_write64(vmcb_addr, VMCB_GUEST_RFLAGS, RFLAGS_1_BIT);
  vmcb_write64(vmcb_addr, VMCB_GUEST_EFER,
               X86_EFER_LME | X86_EFER_LMA | X86_EFER_SVME);
  vmcb_write64(vmcb_addr, VMCB_RAX, 0);
  struct {
    uint16_t limit;
    uint64_t base;
  } __attribute__((packed)) gdtr, idtr;
  asm volatile("sgdt %0" : "=m"(gdtr));
  asm volatile("sidt %0" : "=m"(idtr));
  vmcb_write64(vmcb_addr, VMCB_GUEST_GDTR_BASE, gdtr.base);
  vmcb_write32(vmcb_addr, VMCB_GUEST_GDTR_LIM, gdtr.limit);
  vmcb_write64(vmcb_addr, VMCB_GUEST_IDTR_BASE, idtr.base);
  vmcb_write32(vmcb_addr, VMCB_GUEST_IDTR_LIM, idtr.limit);
  vmcb_write32(vmcb_addr, VMCB_CTRL_INTERCEPT_VEC3,
               VMCB_CTRL_INTERCEPT_VEC3_ALL);
  vmcb_write32(vmcb_addr, VMCB_CTRL_INTERCEPT_VEC4,
               VMCB_CTRL_INTERCEPT_VEC4_ALL);
  vmcb_write64(vmcb_addr, VMCB_CTRL_NP_ENABLE, (1 << VMCB_CTRL_NPT_ENABLE_BIT));
  uint64_t npt_pointer = (npt_pml4_addr & ~0xFFF);
  vmcb_write64(vmcb_addr, VMCB_CTRL_N_CR3, npt_pointer);
  vmcb_write32(vmcb_addr, VMCB_CTRL_ASID, 1);
}

GUEST_CODE static noinline void nested_create_vm_amd(struct api_call_1* cmd,
                                                     uint64_t cpu_id)
{
  uint64_t vm_id = cmd->arg;
  uint64_t vmcb_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id);
  uint64_t l2_pml4_addr = X86_SYZOS_ADDR_VM_PGTABLE(cpu_id, vm_id);
  uint64_t l2_msr_bitmap = X86_SYZOS_ADDR_MSR_BITMAP(cpu_id, vm_id);
  guest_memset((void*)vmcb_addr, 0, KVM_PAGE_SIZE);
  guest_memset((void*)X86_SYZOS_ADDR_VM_ARCH_SPECIFIC(cpu_id), 0,
               KVM_PAGE_SIZE);
  guest_memset((void*)l2_pml4_addr, 0, KVM_PAGE_SIZE);
  guest_memset((void*)l2_msr_bitmap, 0, KVM_PAGE_SIZE);
  setup_l2_page_tables(CPU_VENDOR_AMD, cpu_id, vm_id, 0);
  init_vmcb_guest_state(cpu_id, vm_id);
}

GUEST_CODE static noinline void
guest_handle_nested_create_vm(struct api_call_1* cmd, uint64_t cpu_id)
{
  if (get_cpu_vendor() == CPU_VENDOR_INTEL) {
    nested_create_vm_intel(cmd, cpu_id);
  } else {
    nested_create_vm_amd(cmd, cpu_id);
  }
}

GUEST_CODE static uint64_t l2_gpa_to_pa(uint64_t cpu_id, uint64_t vm_id,
                                        uint64_t gpa)
{
  uint64_t pml4_addr = X86_SYZOS_ADDR_VM_PGTABLE(cpu_id, vm_id);
  volatile uint64_t* pml4 = (volatile uint64_t*)pml4_addr;
  uint64_t pml4_idx = (gpa >> 39) & 0x1FF;
  if (!(pml4[pml4_idx] & X86_PDE64_PRESENT))
    return 0;
  volatile uint64_t* pdpt = (volatile uint64_t*)(pml4[pml4_idx] & ~0xFFF);
  uint64_t pdpt_idx = (gpa >> 30) & 0x1FF;
  if (!(pdpt[pdpt_idx] & X86_PDE64_PRESENT))
    return 0;
  volatile uint64_t* pd = (volatile uint64_t*)(pdpt[pdpt_idx] & ~0xFFF);
  uint64_t pd_idx = (gpa >> 21) & 0x1FF;
  if (!(pd[pd_idx] & X86_PDE64_PRESENT))
    return 0;
  volatile uint64_t* pt = (volatile uint64_t*)(pd[pd_idx] & ~0xFFF);
  uint64_t pt_idx = (gpa >> 12) & 0x1FF;
  if (!(pt[pt_idx] & X86_PDE64_PRESENT))
    return 0;
  return (pt[pt_idx] & ~0xFFF) + (gpa & 0xFFF);
}

GUEST_CODE static noinline void
guest_handle_nested_load_code(struct api_call_nested_load_code* cmd,
                              uint64_t cpu_id)
{
  uint64_t vm_id = cmd->vm_id;
  uint64_t l2_code_backing =
      l2_gpa_to_pa(cpu_id, vm_id, X86_SYZOS_ADDR_USER_CODE);
  if (!l2_code_backing) {
    guest_uexit(0xE2BAD4);
    return;
  }
  uint64_t l2_code_size =
      cmd->header.size - sizeof(struct api_call_header) - sizeof(uint64_t);
  if (l2_code_size > KVM_PAGE_SIZE)
    l2_code_size = KVM_PAGE_SIZE;
  guest_memcpy((void*)l2_code_backing, (void*)cmd->insns, l2_code_size);
  if (get_cpu_vendor() == CPU_VENDOR_INTEL) {
    nested_vmptrld(cpu_id, vm_id);
    vmwrite(VMCS_GUEST_RIP, X86_SYZOS_ADDR_USER_CODE);
    vmwrite(VMCS_GUEST_RSP, X86_SYZOS_ADDR_STACK_BOTTOM + KVM_PAGE_SIZE - 8);
  } else {
    vmcb_write64(X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id), VMCB_GUEST_RIP,
                 X86_SYZOS_ADDR_USER_CODE);
    vmcb_write64(X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id), VMCB_GUEST_RSP,
                 X86_SYZOS_ADDR_STACK_BOTTOM + KVM_PAGE_SIZE - 8);
  }
}

GUEST_CODE static noinline void
guest_handle_nested_load_syzos(struct api_call_nested_load_syzos* cmd,
                               uint64_t cpu_id)
{
  uint64_t vm_id = cmd->vm_id;
  uint64_t prog_size =
      cmd->header.size -
      __builtin_offsetof(struct api_call_nested_load_syzos, program);
  uint64_t l2_code_backing = X86_SYZOS_ADDR_VM_CODE(cpu_id, vm_id);
  volatile struct syzos_globals* globals =
      (volatile struct syzos_globals*)X86_SYZOS_ADDR_GLOBALS;
  if (prog_size > KVM_PAGE_SIZE)
    prog_size = KVM_PAGE_SIZE;
  guest_memcpy((void*)l2_code_backing, (void*)cmd->program, prog_size);
  uint64_t globals_pa = l2_gpa_to_pa(cpu_id, vm_id, X86_SYZOS_ADDR_GLOBALS);
  if (!globals_pa) {
    guest_uexit(0xE2BAD3);
    return;
  }
  volatile struct syzos_globals* l2_globals =
      (volatile struct syzos_globals*)globals_pa;
  for (int i = 0; i < KVM_MAX_VCPU; i++) {
    l2_globals->text_sizes[i] = prog_size;
    globals->l2_ctx[i][vm_id].rdi = i;
    globals->l2_ctx[i][vm_id].rax = 0;
  }
  uint64_t entry_rip = executor_fn_guest_addr(guest_main);
  if (get_cpu_vendor() == CPU_VENDOR_INTEL) {
    nested_vmptrld(cpu_id, vm_id);
    vmwrite(VMCS_GUEST_RIP, entry_rip);
    vmwrite(VMCS_GUEST_RSP, X86_SYZOS_ADDR_STACK_BOTTOM + KVM_PAGE_SIZE - 8);
  } else {
    uint64_t vmcb = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id);
    vmcb_write64(vmcb, VMCB_GUEST_RIP, entry_rip);
    vmcb_write64(vmcb, VMCB_GUEST_RSP,
                 X86_SYZOS_ADDR_STACK_BOTTOM + KVM_PAGE_SIZE - 8);
  }
}

GUEST_CODE static noinline void
guest_handle_nested_vmentry_intel(uint64_t vm_id, uint64_t cpu_id,
                                  bool is_launch)
{
  volatile struct syzos_globals* globals =
      (volatile struct syzos_globals*)X86_SYZOS_ADDR_GLOBALS;
  struct l2_guest_regs* l2_regs =
      (struct l2_guest_regs*)&globals->l2_ctx[cpu_id][vm_id];
  uint64_t vmx_error_code = 0;
  uint64_t fail_flag = 0;
  nested_vmptrld(cpu_id, vm_id);
  globals->active_vm_id[cpu_id] = vm_id;
  asm volatile(
      R"(
		sub $128, %%rsp
		push %[cpu_id]
		push %[launch]
		push %%rbx
		push %%rbp
		push %%r12
		push %%r13
		push %%r14
		push %%r15
		mov %[host_rsp_field], %%r10
		mov %%rsp, %%r11
		vmwrite %%r11, %%r10
		mov %[l2_regs], %%rax
		mov 8(%%rax), %%rbx
		mov 16(%%rax), %%rcx
		mov 24(%%rax), %%rdx
		mov 32(%%rax), %%rsi
		mov 40(%%rax), %%rdi
		mov 48(%%rax), %%rbp
		mov 56(%%rax), %%r8
		mov 64(%%rax), %%r9
		mov 72(%%rax), %%r10
		mov 80(%%rax), %%r11
		mov 88(%%rax), %%r12
		mov 96(%%rax), %%r13
		mov 104(%%rax), %%r14
		mov 112(%%rax), %%r15
		mov 0(%%rax), %%rax
		cmpq $0, 48(%%rsp)
		je 1f
		vmlaunch
		jmp 2f
	1:	vmresume
	2:
		pop %%r15
		pop %%r14
		pop %%r13
		pop %%r12
		pop %%rbp
		pop %%rbx
		add $16, %%rsp
		add $128, %%rsp
		mov $1, %[ret]
		jmp 3f
		.globl after_vmentry_label
	after_vmentry_label:
		xor %[ret], %[ret]
	3:
	)"
      : [ret] "=&r"(fail_flag)
      : [launch] "r"((uint64_t)is_launch), [host_rsp_field] "i"(VMCS_HOST_RSP),
        [cpu_id] "r"(cpu_id), [l2_regs] "r"(l2_regs)
      : "cc", "memory", "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10",
        "r11");
  if (fail_flag) {
    vmx_error_code = vmread(VMCS_VM_INSTRUCTION_ERROR);
    guest_uexit(0xE2E10000 | (uint32_t)vmx_error_code);
    return;
  }
}

GUEST_CODE static noinline void guest_run_amd_vm(uint64_t cpu_id,
                                                 uint64_t vm_id)
{
  uint64_t vmcb_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id);
  volatile struct syzos_globals* globals =
      (volatile struct syzos_globals*)X86_SYZOS_ADDR_GLOBALS;
  globals->active_vm_id[cpu_id] = vm_id;
  struct l2_guest_regs* l2_regs =
      (struct l2_guest_regs*)&globals->l2_ctx[cpu_id][vm_id];
  uint8_t fail_flag = 0;
  asm volatile(
      R"(
		sub $128, %%rsp
		push %[cpu_id]
		push %[vmcb_addr]
		push %%rbx
		push %%rbp
		push %%r12
		push %%r13
		push %%r14
		push %%r15
		mov %[l2_regs], %%rax
		mov 0(%%rax), %%rbx
		mov %[vmcb_addr], %%rcx
		mov %%rbx, 0x5f8(%%rcx)
		mov 8(%%rax), %%rbx
		mov 16(%%rax), %%rcx
		mov 24(%%rax), %%rdx
		mov 32(%%rax), %%rsi
		mov 40(%%rax), %%rdi
		mov 48(%%rax), %%rbp
		mov 56(%%rax), %%r8
		mov 64(%%rax), %%r9
		mov 72(%%rax), %%r10
		mov 80(%%rax), %%r11
		mov 88(%%rax), %%r12
		mov 96(%%rax), %%r13
		mov 104(%%rax), %%r14
		mov 112(%%rax), %%r15
		clgi
		mov 48(%%rsp), %%rax
		vmrun
	1:
		mov 48(%%rsp), %%rax
		setc %[fail_flag]
		pushq 0x70(%%rax)
		push %%r15
		push %%r14
		push %%r13
		push %%r12
		push %%r11
		push %%r10
		push %%r9
		push %%r8
		push %%rbp
		push %%rdi
		push %%rsi
		push %%rdx
		push %%rcx
		push %%rbx
		mov 176(%%rsp), %%rax
		pushq 0x5f8(%%rax)
		mov 120(%%rsp), %%rdi
		mov %%rsp, %%rsi
		call nested_vm_exit_handler_amd
		add $128, %%rsp
		pop %%r15
		pop %%r14
		pop %%r13
		pop %%r12
		pop %%rbp
		pop %%rbx
		add $16, %%rsp
		add $128, %%rsp
		stgi
		after_vmentry_label_amd:
	)"
      : [fail_flag] "=m"(fail_flag)
      : [cpu_id] "r"(cpu_id), [vmcb_addr] "r"(vmcb_addr),
        [l2_regs] "r"(l2_regs), [l2_regs_size] "i"(sizeof(struct l2_guest_regs))
      : "cc", "memory", "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10",
        "r11");
  if (fail_flag) {
    guest_uexit(0xE2E10000 | 0xFFFF);
    return;
  }
}

GUEST_CODE static noinline void
guest_handle_nested_vmlaunch(struct api_call_1* cmd, uint64_t cpu_id)
{
  uint64_t vm_id = cmd->arg;
  if (get_cpu_vendor() == CPU_VENDOR_INTEL) {
    guest_handle_nested_vmentry_intel(vm_id, cpu_id, true);
  } else {
    guest_run_amd_vm(cpu_id, vm_id);
  }
}

GUEST_CODE static noinline void
guest_handle_nested_vmresume(struct api_call_1* cmd, uint64_t cpu_id)
{
  uint64_t vm_id = cmd->arg;
  if (get_cpu_vendor() == CPU_VENDOR_INTEL) {
    guest_handle_nested_vmentry_intel(vm_id, cpu_id, false);
  } else {
    guest_run_amd_vm(cpu_id, vm_id);
  }
}

GUEST_CODE static noinline void
guest_handle_nested_intel_vmwrite_mask(struct api_call_5* cmd, uint64_t cpu_id)
{
  if (get_cpu_vendor() != CPU_VENDOR_INTEL)
    return;
  uint64_t vm_id = cmd->args[0];
  nested_vmptrld(cpu_id, vm_id);
  uint64_t field = cmd->args[1];
  uint64_t set_mask = cmd->args[2];
  uint64_t unset_mask = cmd->args[3];
  uint64_t flip_mask = cmd->args[4];
  uint64_t current_value = vmread(field);
  uint64_t new_value = (current_value & ~unset_mask) | set_mask;
  new_value ^= flip_mask;
  vmwrite(field, new_value);
}

GUEST_CODE static noinline void
guest_handle_nested_amd_vmcb_write_mask(struct api_call_5* cmd, uint64_t cpu_id)
{
  if (get_cpu_vendor() != CPU_VENDOR_AMD)
    return;
  uint64_t vm_id = cmd->args[0];
  uint64_t vmcb_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id);
  uint64_t offset = cmd->args[1];
  uint64_t set_mask = cmd->args[2];
  uint64_t unset_mask = cmd->args[3];
  uint64_t flip_mask = cmd->args[4];
  uint64_t current_value = vmcb_read64((volatile uint8_t*)vmcb_addr, offset);
  uint64_t new_value = (current_value & ~unset_mask) | set_mask;
  new_value ^= flip_mask;
  vmcb_write64(vmcb_addr, offset, new_value);
}

GUEST_CODE static noinline void
guest_handle_nested_amd_invlpga(struct api_call_2* cmd, uint64_t cpu_id)
{
  if (get_cpu_vendor() != CPU_VENDOR_AMD)
    return;
  uint64_t linear_addr = cmd->args[0];
  uint32_t asid = (uint32_t)cmd->args[1];
  asm volatile("invlpga" : : "a"(linear_addr), "c"(asid) : "memory");
}

GUEST_CODE static noinline void guest_handle_nested_amd_stgi()
{
  if (get_cpu_vendor() != CPU_VENDOR_AMD)
    return;
  asm volatile("stgi" ::: "memory");
}

GUEST_CODE static noinline void guest_handle_nested_amd_clgi()
{
  if (get_cpu_vendor() != CPU_VENDOR_AMD)
    return;
  asm volatile("clgi" ::: "memory");
}

GUEST_CODE static noinline void
guest_handle_nested_amd_inject_event(struct api_call_5* cmd, uint64_t cpu_id)
{
  if (get_cpu_vendor() != CPU_VENDOR_AMD)
    return;
  uint64_t vm_id = cmd->args[0];
  uint64_t vmcb_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id);
  uint64_t vector = cmd->args[1] & 0xFF;
  uint64_t type = cmd->args[2] & 0x7;
  uint64_t error_code = cmd->args[3] & 0xFFFFFFFF;
  uint64_t flags = cmd->args[4];
  uint64_t event_inj = vector;
  event_inj |= (type << 8);
  if (flags & 2)
    event_inj |= (1ULL << 11);
  if (flags & 1)
    event_inj |= (1ULL << 31);
  event_inj |= (error_code << 32);
  vmcb_write64(vmcb_addr, 0x60, event_inj);
}

GUEST_CODE static noinline void
guest_handle_nested_amd_set_intercept(struct api_call_5* cmd, uint64_t cpu_id)
{
  if (get_cpu_vendor() != CPU_VENDOR_AMD)
    return;
  uint64_t vm_id = cmd->args[0];
  uint64_t vmcb_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id);
  uint64_t offset = cmd->args[1];
  uint64_t bit_mask = cmd->args[2];
  uint64_t action = cmd->args[3];
  uint32_t current = vmcb_read32(vmcb_addr, (uint16_t)offset);
  if (action == 1)
    current |= (uint32_t)bit_mask;
  else
    current &= ~((uint32_t)bit_mask);
  vmcb_write32(vmcb_addr, (uint16_t)offset, current);
}

GUEST_CODE static noinline void
guest_handle_nested_amd_vmload(struct api_call_1* cmd, uint64_t cpu_id)
{
  if (get_cpu_vendor() != CPU_VENDOR_AMD)
    return;
  uint64_t vm_id = cmd->arg;
  uint64_t vmcb_pa = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id);
  asm volatile("vmload %%rax" ::"a"(vmcb_pa) : "memory");
}

GUEST_CODE static noinline void
guest_handle_nested_amd_vmsave(struct api_call_1* cmd, uint64_t cpu_id)
{
  if (get_cpu_vendor() != CPU_VENDOR_AMD)
    return;
  uint64_t vm_id = cmd->arg;
  uint64_t vmcb_pa = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id);
  asm volatile("vmsave %%rax" ::"a"(vmcb_pa) : "memory");
}

const char kvm_asm16_cpl3[] =
    "\x0f\x20\xc0\x66\x83\xc8\x01\x0f\x22\xc0\xb8\xa0\x00\x0f\x00\xd8\xb8\x2b"
    "\x00\x8e\xd8\x8e\xc0\x8e\xe0\x8e\xe8\xbc\x00\x01\xc7\x06\x00\x01\x1d\xba"
    "\xc7\x06\x02\x01\x23\x00\xc7\x06\x04\x01\x00\x01\xc7\x06\x06\x01\x2b\x00"
    "\xcb";
const char kvm_asm32_paged[] = "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0";
const char kvm_asm32_vm86[] =
    "\x66\xb8\xb8\x00\x0f\x00\xd8\xea\x00\x00\x00\x00\xd0\x00";
const char kvm_asm32_paged_vm86[] =
    "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0\x66\xb8\xb8\x00\x0f\x00\xd8"
    "\xea\x00\x00\x00\x00\xd0\x00";
const char kvm_asm64_enable_long[] =
    "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0\xea\xde\xc0\xad\x0b\x50\x00"
    "\x48\xc7\xc0\xd8\x00\x00\x00\x0f\x00\xd8";
const char kvm_asm64_init_vm[] =
    "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0\xea\xde\xc0\xad\x0b\x50\x00"
    "\x48\xc7\xc0\xd8\x00\x00\x00\x0f\x00\xd8\x48\xc7\xc1\x3a\x00\x00\x00\x0f"
    "\x32\x48\x83\xc8\x05\x0f\x30\x0f\x20\xe0\x48\x0d\x00\x20\x00\x00\x0f\x22"
    "\xe0\x48\xc7\xc1\x80\x04\x00\x00\x0f\x32\x48\xc7\xc2\x00\x60\x00\x00\x89"
    "\x02\x48\xc7\xc2\x00\x70\x00\x00\x89\x02\x48\xc7\xc0\x00\x5f\x00\x00\xf3"
    "\x0f\xc7\x30\x48\xc7\xc0\x08\x5f\x00\x00\x66\x0f\xc7\x30\x0f\xc7\x30\x48"
    "\xc7\xc1\x81\x04\x00\x00\x0f\x32\x48\x83\xc8\x00\x48\x21\xd0\x48\xc7\xc2"
    "\x00\x40\x00\x00\x0f\x79\xd0\x48\xc7\xc1\x82\x04\x00\x00\x0f\x32\x48\x83"
    "\xc8\x00\x48\x21\xd0\x48\xc7\xc2\x02\x40\x00\x00\x0f\x79\xd0\x48\xc7\xc2"
    "\x1e\x40\x00\x00\x48\xc7\xc0\x81\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc1\x83"
    "\x04\x00\x00\x0f\x32\x48\x0d\xff\x6f\x03\x00\x48\x21\xd0\x48\xc7\xc2\x0c"
    "\x40\x00\x00\x0f\x79\xd0\x48\xc7\xc1\x84\x04\x00\x00\x0f\x32\x48\x0d\xff"
    "\x17\x00\x00\x48\x21\xd0\x48\xc7\xc2\x12\x40\x00\x00\x0f\x79\xd0\x48\xc7"
    "\xc2\x04\x2c\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2"
    "\x00\x28\x00\x00\x48\xc7\xc0\xff\xff\xff\xff\x0f\x79\xd0\x48\xc7\xc2\x02"
    "\x0c\x00\x00\x48\xc7\xc0\x50\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc0\x58\x00"
    "\x00\x00\x48\xc7\xc2\x00\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x04\x0c\x00"
    "\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x08"
    "\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0a\x0c\x00\x00\x0f\x79\xd0\x48\xc7"
    "\xc0\xd8\x00\x00\x00\x48\xc7\xc2\x0c\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc2"
    "\x02\x2c\x00\x00\x48\xc7\xc0\x00\x05\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x00"
    "\x4c\x00\x00\x48\xc7\xc0\x50\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x10\x6c"
    "\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x12\x6c\x00"
    "\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x0f\x20\xc0\x48\xc7\xc2\x00"
    "\x6c\x00\x00\x48\x89\xc0\x0f\x79\xd0\x0f\x20\xd8\x48\xc7\xc2\x02\x6c\x00"
    "\x00\x48\x89\xc0\x0f\x79\xd0\x0f\x20\xe0\x48\xc7\xc2\x04\x6c\x00\x00\x48"
    "\x89\xc0\x0f\x79\xd0\x48\xc7\xc2\x06\x6c\x00\x00\x48\xc7\xc0\x00\x00\x00"
    "\x00\x0f\x79\xd0\x48\xc7\xc2\x08\x6c\x00\x00\x48\xc7\xc0\x00\x00\x00\x00"
    "\x0f\x79\xd0\x48\xc7\xc2\x0a\x6c\x00\x00\x48\xc7\xc0\x00\x3a\x00\x00\x0f"
    "\x79\xd0\x48\xc7\xc2\x0c\x6c\x00\x00\x48\xc7\xc0\x00\x10\x00\x00\x0f\x79"
    "\xd0\x48\xc7\xc2\x0e\x6c\x00\x00\x48\xc7\xc0\x00\x38\x00\x00\x0f\x79\xd0"
    "\x48\xc7\xc2\x14\x6c\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48"
    "\xc7\xc2\x16\x6c\x00\x00\x48\x8b\x04\x25\x10\x5f\x00\x00\x0f\x79\xd0\x48"
    "\xc7\xc2\x00\x00\x00\x00\x48\xc7\xc0\x01\x00\x00\x00\x0f\x79\xd0\x48\xc7"
    "\xc2\x02\x00\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2"
    "\x00\x20\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x02"
    "\x20\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x04\x20"
    "\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x20\x00"
    "\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc1\x77\x02\x00\x00"
    "\x0f\x32\x48\xc1\xe2\x20\x48\x09\xd0\x48\xc7\xc2\x00\x2c\x00\x00\x48\x89"
    "\xc0\x0f\x79\xd0\x48\xc7\xc2\x04\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00"
    "\x0f\x79\xd0\x48\xc7\xc2\x0a\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f"
    "\x79\xd0\x48\xc7\xc2\x0e\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79"
    "\xd0\x48\xc7\xc2\x10\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0"
    "\x48\xc7\xc2\x16\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48"
    "\xc7\xc2\x14\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7"
    "\xc2\x00\x60\x00\x00\x48\xc7\xc0\xff\xff\xff\xff\x0f\x79\xd0\x48\xc7\xc2"
    "\x02\x60\x00\x00\x48\xc7\xc0\xff\xff\xff\xff\x0f\x79\xd0\x48\xc7\xc2\x1c"
    "\x20\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x1e\x20"
    "\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x20\x20\x00"
    "\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x22\x20\x00\x00"
    "\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x00\x08\x00\x00\x48"
    "\xc7\xc0\x58\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x02\x08\x00\x00\x48\xc7"
    "\xc0\x50\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x04\x08\x00\x00\x48\xc7\xc0"
    "\x58\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x08\x00\x00\x48\xc7\xc0\x58"
    "\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x08\x08\x00\x00\x48\xc7\xc0\x58\x00"
    "\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0a\x08\x00\x00\x48\xc7\xc0\x58\x00\x00"
    "\x00\x0f\x79\xd0\x48\xc7\xc2\x0c\x08\x00\x00\x48\xc7\xc0\x00\x00\x00\x00"
    "\x0f\x79\xd0\x48\xc7\xc2\x0e\x08\x00\x00\x48\xc7\xc0\xd8\x00\x00\x00\x0f"
    "\x79\xd0\x48\xc7\xc2\x12\x68\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79"
    "\xd0\x48\xc7\xc2\x14\x68\x00\x00\x48\xc7\xc0\x00\x3a\x00\x00\x0f\x79\xd0"
    "\x48\xc7\xc2\x16\x68\x00\x00\x48\xc7\xc0\x00\x10\x00\x00\x0f\x79\xd0\x48"
    "\xc7\xc2\x18\x68\x00\x00\x48\xc7\xc0\x00\x38\x00\x00\x0f\x79\xd0\x48\xc7"
    "\xc2\x00\x48\x00\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2"
    "\x02\x48\x00\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x04"
    "\x48\x00\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x48"
    "\x00\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x08\x48\x00"
    "\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x0a\x48\x00\x00"
    "\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x0c\x48\x00\x00\x48"
    "\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0e\x48\x00\x00\x48\xc7"
    "\xc0\xff\x1f\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x10\x48\x00\x00\x48\xc7\xc0"
    "\xff\x1f\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x12\x48\x00\x00\x48\xc7\xc0\xff"
    "\x1f\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x14\x48\x00\x00\x48\xc7\xc0\x93\x40"
    "\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x16\x48\x00\x00\x48\xc7\xc0\x9b\x20\x00"
    "\x00\x0f\x79\xd0\x48\xc7\xc2\x18\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00"
    "\x0f\x79\xd0\x48\xc7\xc2\x1a\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00\x0f"
    "\x79\xd0\x48\xc7\xc2\x1c\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00\x0f\x79"
    "\xd0\x48\xc7\xc2\x1e\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00\x0f\x79\xd0"
    "\x48\xc7\xc2\x20\x48\x00\x00\x48\xc7\xc0\x82\x00\x00\x00\x0f\x79\xd0\x48"
    "\xc7\xc2\x22\x48\x00\x00\x48\xc7\xc0\x8b\x00\x00\x00\x0f\x79\xd0\x48\xc7"
    "\xc2\x1c\x68\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2"
    "\x1e\x68\x00\x00\x48\xc7\xc0\x00\x91\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x20"
    "\x68\x00\x00\x48\xc7\xc0\x02\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x28"
    "\x00\x00\x48\xc7\xc0\x00\x05\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0a\x28\x00"
    "\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0c\x28\x00\x00"
    "\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0e\x28\x00\x00\x48"
    "\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x10\x28\x00\x00\x48\xc7"
    "\xc0\x00\x00\x00\x00\x0f\x79\xd0\x0f\x20\xc0\x48\xc7\xc2\x00\x68\x00\x00"
    "\x48\x89\xc0\x0f\x79\xd0\x0f\x20\xd8\x48\xc7\xc2\x02\x68\x00\x00\x48\x89"
    "\xc0\x0f\x79\xd0\x0f\x20\xe0\x48\xc7\xc2\x04\x68\x00\x00\x48\x89\xc0\x0f"
    "\x79\xd0\x48\xc7\xc0\x18\x5f\x00\x00\x48\x8b\x10\x48\xc7\xc0\x20\x5f\x00"
    "\x00\x48\x8b\x08\x48\x31\xc0\x0f\x78\xd0\x48\x31\xc8\x0f\x79\xd0\x0f\x01"
    "\xc2\x48\xc7\xc2\x00\x44\x00\x00\x0f\x78\xd0\xf4";
const char kvm_asm64_vm_exit[] =
    "\x48\xc7\xc3\x00\x44\x00\x00\x0f\x78\xda\x48\xc7\xc3\x02\x44\x00\x00\x0f"
    "\x78\xd9\x48\xc7\xc0\x00\x64\x00\x00\x0f\x78\xc0\x48\xc7\xc3\x1e\x68\x00"
    "\x00\x0f\x78\xdb\xf4";
const char kvm_asm64_cpl3[] =
    "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0\xea\xde\xc0\xad\x0b\x50\x00"
    "\x48\xc7\xc0\xd8\x00\x00\x00\x0f\x00\xd8\x48\xc7\xc0\x6b\x00\x00\x00\x8e"
    "\xd8\x8e\xc0\x8e\xe0\x8e\xe8\x48\xc7\xc4\x80\x0f\x00\x00\x48\xc7\x04\x24"
    "\x1d\xba\x00\x00\x48\xc7\x44\x24\x04\x63\x00\x00\x00\x48\xc7\x44\x24\x08"
    "\x80\x0f\x00\x00\x48\xc7\x44\x24\x0c\x6b\x00\x00\x00\xcb";

#define KVM_SMI _IO(KVMIO, 0xb7)

static const struct mem_region syzos_mem_regions[] = {
    {X86_SYZOS_ADDR_ZERO, 5, MEM_REGION_FLAG_GPA0},
    {X86_SYZOS_ADDR_VAR_IDT, 10, 0},
    {X86_SYZOS_ADDR_BOOT_ARGS, 1, 0},
    {X86_SYZOS_ADDR_PT_POOL, X86_SYZOS_PT_POOL_SIZE, 0},
    {X86_SYZOS_ADDR_GLOBALS, 1, 0},
    {X86_SYZOS_ADDR_SMRAM, 10, 0},
    {X86_SYZOS_ADDR_EXIT, 1, MEM_REGION_FLAG_NO_HOST_MEM},
    {X86_SYZOS_ADDR_DIRTY_PAGES, 2, MEM_REGION_FLAG_DIRTY_LOG},
    {X86_SYZOS_ADDR_USER_CODE, KVM_MAX_VCPU,
     MEM_REGION_FLAG_READONLY | MEM_REGION_FLAG_USER_CODE},
    {SYZOS_ADDR_EXECUTOR_CODE, 4,
     MEM_REGION_FLAG_READONLY | MEM_REGION_FLAG_EXECUTOR_CODE},
    {X86_SYZOS_ADDR_SCRATCH_CODE, 1, 0},
    {X86_SYZOS_ADDR_STACK_BOTTOM, 1, 0},
    {X86_SYZOS_PER_VCPU_REGIONS_BASE,
     (KVM_MAX_VCPU * X86_SYZOS_L1_VCPU_REGION_SIZE) / KVM_PAGE_SIZE, 0},
    {X86_SYZOS_ADDR_IOAPIC, 1, 0},
    {X86_SYZOS_ADDR_UNUSED, 0, MEM_REGION_FLAG_REMAINING},
};

#define SYZOS_REGION_COUNT                                                     \
  (sizeof(syzos_mem_regions) / sizeof(syzos_mem_regions[0]))

struct kvm_syz_vm {
  int vmfd;
  int next_cpu_id;
  void* host_mem;
  size_t total_pages;
  void* user_text;
  void* gpa0_mem;
  void* pt_pool_mem;
  void* globals_mem;
  void* region_base[SYZOS_REGION_COUNT];
};

static inline void* gpa_to_hva(struct kvm_syz_vm* vm, uint64_t gpa)
{
  for (size_t i = 0; i < SYZOS_REGION_COUNT; i++) {
    const struct mem_region* r = &syzos_mem_regions[i];
    if (r->flags & MEM_REGION_FLAG_NO_HOST_MEM)
      continue;
    if (r->gpa == X86_SYZOS_ADDR_UNUSED)
      break;
    size_t region_size = r->pages * KVM_PAGE_SIZE;
    if (gpa >= r->gpa && gpa < r->gpa + region_size)
      return (void*)((char*)vm->region_base[i] + (gpa - r->gpa));
  }
  return NULL;
}

#define X86_NUM_IDT_ENTRIES 256
static void syzos_setup_idt(struct kvm_syz_vm* vm, struct kvm_sregs* sregs)
{
  sregs->idt.base = X86_SYZOS_ADDR_VAR_IDT;
  sregs->idt.limit = (X86_NUM_IDT_ENTRIES * sizeof(struct idt_entry_64)) - 1;
  volatile struct idt_entry_64* idt =
      (volatile struct idt_entry_64*)(uint64_t)gpa_to_hva(vm, sregs->idt.base);
  uint64_t handler_addr = executor_fn_guest_addr(dummy_null_handler);
  for (int i = 0; i < X86_NUM_IDT_ENTRIES; i++) {
    idt[i].offset_low = (uint16_t)(handler_addr & 0xFFFF);
    idt[i].selector = X86_SYZOS_SEL_CODE;
    idt[i].ist = 0;
    idt[i].type_attr = 0x8E;
    idt[i].offset_mid = (uint16_t)((handler_addr >> 16) & 0xFFFF);
    idt[i].offset_high = (uint32_t)((handler_addr >> 32) & 0xFFFFFFFF);
    idt[i].reserved = 0;
  }
}

struct kvm_text {
  uintptr_t typ;
  const void* text;
  uintptr_t size;
};

#define PAGE_MASK GENMASK_ULL(51, 12)

typedef struct {
  uint64_t next_page;
  uint64_t last_page;
} page_alloc_t;

static uint64_t pg_alloc(page_alloc_t* alloc)
{
  if (alloc->next_page >= alloc->last_page)
    exit(1);
  uint64_t page = alloc->next_page;
  alloc->next_page += KVM_PAGE_SIZE;
  return page;
}
static uint64_t* get_host_pte_ptr(struct kvm_syz_vm* vm, uint64_t gpa)
{
  if (gpa >= X86_SYZOS_ADDR_PT_POOL &&
      gpa < X86_SYZOS_ADDR_PT_POOL + (X86_SYZOS_PT_POOL_SIZE * KVM_PAGE_SIZE)) {
    uint64_t offset = gpa - X86_SYZOS_ADDR_PT_POOL;
    return (uint64_t*)((char*)vm->pt_pool_mem + offset);
  }
  return (uint64_t*)((char*)vm->gpa0_mem + gpa);
}

static void map_4k_page(struct kvm_syz_vm* vm, page_alloc_t* alloc,
                        uint64_t gpa)
{
  uint64_t* pml4 = (uint64_t*)((char*)vm->gpa0_mem + X86_SYZOS_ADDR_PML4);
  uint64_t pml4_idx = (gpa >> 39) & 0x1FF;
  if (pml4[pml4_idx] == 0)
    pml4[pml4_idx] = X86_PDE64_PRESENT | X86_PDE64_RW | pg_alloc(alloc);
  uint64_t* pdpt = get_host_pte_ptr(vm, pml4[pml4_idx] & PAGE_MASK);
  uint64_t pdpt_idx = (gpa >> 30) & 0x1FF;
  if (pdpt[pdpt_idx] == 0)
    pdpt[pdpt_idx] = X86_PDE64_PRESENT | X86_PDE64_RW | pg_alloc(alloc);
  uint64_t* pd = get_host_pte_ptr(vm, pdpt[pdpt_idx] & PAGE_MASK);
  uint64_t pd_idx = (gpa >> 21) & 0x1FF;
  if (pd[pd_idx] == 0)
    pd[pd_idx] = X86_PDE64_PRESENT | X86_PDE64_RW | pg_alloc(alloc);
  uint64_t* pt = get_host_pte_ptr(vm, pd[pd_idx] & PAGE_MASK);
  uint64_t pt_idx = (gpa >> 12) & 0x1FF;
  pt[pt_idx] = (gpa & PAGE_MASK) | X86_PDE64_PRESENT | X86_PDE64_RW;
}

static int map_4k_region(struct kvm_syz_vm* vm, page_alloc_t* alloc,
                         uint64_t gpa_start, int num_pages)
{
  for (int i = 0; i < num_pages; i++)
    map_4k_page(vm, alloc, gpa_start + (i * KVM_PAGE_SIZE));
  return num_pages;
}
static void setup_pg_table(struct kvm_syz_vm* vm)
{
  int total = vm->total_pages;
  page_alloc_t alloc = {.next_page = X86_SYZOS_ADDR_PT_POOL,
                        .last_page = X86_SYZOS_ADDR_PT_POOL +
                                     X86_SYZOS_PT_POOL_SIZE * KVM_PAGE_SIZE};
  memset(vm->pt_pool_mem, 0, X86_SYZOS_PT_POOL_SIZE * KVM_PAGE_SIZE);
  memset(vm->gpa0_mem, 0, 5 * KVM_PAGE_SIZE);
  for (size_t i = 0; i < SYZOS_REGION_COUNT; i++) {
    int pages = syzos_mem_regions[i].pages;
    if (syzos_mem_regions[i].flags & MEM_REGION_FLAG_REMAINING) {
      if (total < 0)
        exit(1);
      pages = total;
    }
    map_4k_region(vm, &alloc, syzos_mem_regions[i].gpa, pages);
    if (!(syzos_mem_regions[i].flags & MEM_REGION_FLAG_NO_HOST_MEM))
      total -= pages;
    if (syzos_mem_regions[i].flags & MEM_REGION_FLAG_REMAINING)
      break;
  }
}
struct gdt_entry {
  uint16_t limit_low;
  uint16_t base_low;
  uint8_t base_mid;
  uint8_t access;
  uint8_t limit_high_and_flags;
  uint8_t base_high;
} __attribute__((packed));

static void setup_gdt_64(struct gdt_entry* gdt)
{
  gdt[0] = (struct gdt_entry){0};
  gdt[X86_SYZOS_SEL_CODE >> 3] =
      (struct gdt_entry){.limit_low = 0xFFFF,
                         .base_low = 0,
                         .base_mid = 0,
                         .access = 0x9A,
                         .limit_high_and_flags = 0xAF,
                         .base_high = 0};
  gdt[X86_SYZOS_SEL_DATA >> 3] =
      (struct gdt_entry){.limit_low = 0xFFFF,
                         .base_low = 0,
                         .base_mid = 0,
                         .access = 0x92,
                         .limit_high_and_flags = 0xCF,
                         .base_high = 0};
  gdt[X86_SYZOS_SEL_TSS64 >> 3] = (struct gdt_entry){
      .limit_low = 0x67,
      .base_low = (uint16_t)(X86_SYZOS_ADDR_VAR_TSS & 0xFFFF),
      .base_mid = (uint8_t)((X86_SYZOS_ADDR_VAR_TSS >> 16) & 0xFF),
      .access = SVM_ATTR_TSS_BUSY,
      .limit_high_and_flags = 0,
      .base_high = (uint8_t)((X86_SYZOS_ADDR_VAR_TSS >> 24) & 0xFF)};
  gdt[(X86_SYZOS_SEL_TSS64 >> 3) + 1] = (struct gdt_entry){
      .limit_low = (uint16_t)((uint64_t)X86_SYZOS_ADDR_VAR_TSS >> 32),
      .base_low = (uint16_t)((uint64_t)X86_SYZOS_ADDR_VAR_TSS >> 48),
      .base_mid = 0,
      .access = 0,
      .limit_high_and_flags = 0,
      .base_high = 0};
}

static void get_cpuid(uint32_t eax, uint32_t ecx, uint32_t* a, uint32_t* b,
                      uint32_t* c, uint32_t* d)
{
  *a = *b = *c = *d = 0;
  asm volatile("cpuid"
               : "=a"(*a), "=b"(*b), "=c"(*c), "=d"(*d)
               : "a"(eax), "c"(ecx));
}
static void setup_gdt_ldt_pg(struct kvm_syz_vm* vm, int cpufd, int cpu_id)
{
  struct kvm_sregs sregs;
  ioctl(cpufd, KVM_GET_SREGS, &sregs);
  sregs.gdt.base = X86_SYZOS_ADDR_GDT;
  sregs.gdt.limit = 5 * sizeof(struct gdt_entry) - 1;
  struct gdt_entry* gdt =
      (struct gdt_entry*)(uint64_t)gpa_to_hva(vm, sregs.gdt.base);
  struct kvm_segment seg_cs64;
  memset(&seg_cs64, 0, sizeof(seg_cs64));
  seg_cs64.selector = X86_SYZOS_SEL_CODE;
  seg_cs64.type = 11;
  seg_cs64.base = 0;
  seg_cs64.limit = 0xFFFFFFFFu;
  seg_cs64.present = 1;
  seg_cs64.s = 1;
  seg_cs64.g = 1;
  seg_cs64.l = 1;
  sregs.cs = seg_cs64;
  struct kvm_segment seg_ds64;
  memset(&seg_ds64, 0, sizeof(struct kvm_segment));
  seg_ds64.selector = X86_SYZOS_SEL_DATA;
  seg_ds64.type = 3;
  seg_ds64.limit = 0xFFFFFFFFu;
  seg_ds64.present = 1;
  seg_ds64.s = 1;
  seg_ds64.g = 1;
  seg_ds64.db = 1;
  sregs.ds = seg_ds64;
  sregs.es = seg_ds64;
  sregs.fs = seg_ds64;
  sregs.gs = seg_ds64;
  sregs.ss = seg_ds64;
  struct kvm_segment seg_tr;
  memset(&seg_tr, 0, sizeof(seg_tr));
  seg_tr.selector = X86_SYZOS_SEL_TSS64;
  seg_tr.type = 11;
  seg_tr.base = X86_SYZOS_ADDR_VAR_TSS;
  seg_tr.limit = 0x67;
  seg_tr.present = 1;
  seg_tr.s = 0;
  sregs.tr = seg_tr;
  volatile uint8_t* l1_tss =
      (volatile uint8_t*)(uint64_t)gpa_to_hva(vm, X86_SYZOS_ADDR_VAR_TSS);
  memset((void*)l1_tss, 0, 104);
  *(volatile uint64_t*)(l1_tss + 4) = X86_SYZOS_ADDR_STACK0;
  setup_pg_table(vm);
  setup_gdt_64(gdt);
  syzos_setup_idt(vm, &sregs);
  sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
  sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
  sregs.efer |= (X86_EFER_LME | X86_EFER_LMA | X86_EFER_NXE);
  uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
  get_cpuid(0, 0, &eax, &ebx, &ecx, &edx);
  if (ebx == 0x68747541 && edx == 0x69746e65 && ecx == 0x444d4163) {
    sregs.efer |= X86_EFER_SVME;
    void* hsave_host = (void*)(uint64_t)gpa_to_hva(
        vm, X86_SYZOS_ADDR_VM_ARCH_SPECIFIC(cpu_id));
    memset(hsave_host, 0, KVM_PAGE_SIZE);
  }
  sregs.cr3 = X86_ADDR_PML4;
  ioctl(cpufd, KVM_SET_SREGS, &sregs);
}

static void setup_cpuid(int cpufd)
{
  int kvmfd = open("/dev/kvm", O_RDWR);
  char buf[sizeof(struct kvm_cpuid2) + 128 * sizeof(struct kvm_cpuid_entry2)];
  memset(buf, 0, sizeof(buf));
  struct kvm_cpuid2* cpuid = (struct kvm_cpuid2*)buf;
  cpuid->nent = 128;
  ioctl(kvmfd, KVM_GET_SUPPORTED_CPUID, cpuid);
  ioctl(cpufd, KVM_SET_CPUID2, cpuid);
  close(kvmfd);
}

#define RFLAGS_1_BIT (1ULL << 1)
#define RFLAGS_IF_BIT (1ULL << 9)

static void reset_cpu_regs(int cpufd, uint64_t rip, uint64_t cpu_id)
{
  struct kvm_regs regs;
  memset(&regs, 0, sizeof(regs));
  regs.rflags |= RFLAGS_1_BIT | RFLAGS_IF_BIT;
  regs.rip = rip;
  regs.rsp = X86_SYZOS_ADDR_STACK0;
  regs.rdi = cpu_id;
  ioctl(cpufd, KVM_SET_REGS, &regs);
}

static void install_user_code(struct kvm_syz_vm* vm, int cpufd, int cpu_id,
                              const void* text, size_t text_size)
{
  if ((cpu_id < 0) || (cpu_id >= KVM_MAX_VCPU))
    return;
  if (text_size > KVM_PAGE_SIZE)
    text_size = KVM_PAGE_SIZE;
  void* target = (void*)((uint64_t)vm->user_text + (KVM_PAGE_SIZE * cpu_id));
  memcpy(target, text, text_size);
  setup_gdt_ldt_pg(vm, cpufd, cpu_id);
  setup_cpuid(cpufd);
  uint64_t entry_rip = executor_fn_guest_addr(guest_main);
  reset_cpu_regs(cpufd, entry_rip, cpu_id);
  if (vm->globals_mem) {
    struct syzos_globals* globals = (struct syzos_globals*)vm->globals_mem;
    globals->text_sizes[cpu_id] = text_size;
  }
}

struct addr_size {
  void* addr;
  size_t size;
};

static struct addr_size alloc_guest_mem(struct addr_size* free, size_t size)
{
  struct addr_size ret = {.addr = NULL, .size = 0};
  if (free->size < size)
    return ret;
  ret.addr = free->addr;
  ret.size = size;
  free->addr = (void*)((char*)free->addr + size);
  free->size -= size;
  return ret;
}
static void vm_set_user_memory_region(int vmfd, uint32_t slot, uint32_t flags,
                                      uint64_t guest_phys_addr,
                                      uint64_t memory_size,
                                      uint64_t userspace_addr)
{
  struct kvm_userspace_memory_region memreg;
  memreg.slot = slot;
  memreg.flags = flags;
  memreg.guest_phys_addr = guest_phys_addr;
  memreg.memory_size = memory_size;
  memreg.userspace_addr = userspace_addr;
  ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &memreg);
}

static void install_syzos_code(void* host_mem, size_t mem_size)
{
  size_t size = (char*)&__stop_guest - (char*)&__start_guest;
  if (size > mem_size)
    exit(1);
  memcpy(host_mem, &__start_guest, size);
}

static void setup_vm(int vmfd, struct kvm_syz_vm* vm)
{
  struct addr_size allocator = {.addr = vm->host_mem,
                                .size = vm->total_pages * KVM_PAGE_SIZE};
  int slot = 0;
  struct syzos_boot_args* boot_args = NULL;
  for (size_t i = 0; i < SYZOS_REGION_COUNT; i++) {
    const struct mem_region* r = &syzos_mem_regions[i];
    if (r->flags & MEM_REGION_FLAG_NO_HOST_MEM) {
      vm->region_base[i] = NULL;
      continue;
    }
    size_t pages = r->pages;
    if (r->flags & MEM_REGION_FLAG_REMAINING)
      pages = allocator.size / KVM_PAGE_SIZE;
    struct addr_size next = alloc_guest_mem(&allocator, pages * KVM_PAGE_SIZE);
    vm->region_base[i] = next.addr;
    uint32_t flags = 0;
    if (r->flags & MEM_REGION_FLAG_DIRTY_LOG)
      flags |= KVM_MEM_LOG_DIRTY_PAGES;
    if (r->flags & MEM_REGION_FLAG_READONLY)
      flags |= KVM_MEM_READONLY;
    if (r->flags & MEM_REGION_FLAG_USER_CODE)
      vm->user_text = next.addr;
    if (r->flags & MEM_REGION_FLAG_GPA0)
      vm->gpa0_mem = next.addr;
    if (r->gpa == X86_SYZOS_ADDR_PT_POOL)
      vm->pt_pool_mem = next.addr;
    if (r->gpa == X86_SYZOS_ADDR_GLOBALS)
      vm->globals_mem = next.addr;
    if (r->gpa == X86_SYZOS_ADDR_BOOT_ARGS) {
      boot_args = (struct syzos_boot_args*)next.addr;
      boot_args->region_count = SYZOS_REGION_COUNT;
      for (size_t k = 0; k < boot_args->region_count; k++)
        boot_args->regions[k] = syzos_mem_regions[k];
    }
    if ((r->flags & MEM_REGION_FLAG_REMAINING) && boot_args)
      boot_args->regions[i].pages = pages;
    if (r->flags & MEM_REGION_FLAG_EXECUTOR_CODE)
      install_syzos_code(next.addr, next.size);
    vm_set_user_memory_region(vmfd, slot++, flags, r->gpa, next.size,
                              (uintptr_t)next.addr);
    if (r->flags & MEM_REGION_FLAG_REMAINING)
      break;
  }
}

static long syz_kvm_setup_syzos_vm(volatile long a0, volatile long a1)
{
  const int vmfd = a0;
  void* host_mem = (void*)a1;
  struct kvm_syz_vm* ret = (struct kvm_syz_vm*)host_mem;
  ret->host_mem = (void*)((uint64_t)host_mem + KVM_PAGE_SIZE);
  ret->total_pages = KVM_GUEST_PAGES - 1;
  setup_vm(vmfd, ret);
  ret->vmfd = vmfd;
  ret->next_cpu_id = 0;
  return (long)ret;
}

static long syz_kvm_add_vcpu(volatile long a0, volatile long a1)
{
  struct kvm_syz_vm* vm = (struct kvm_syz_vm*)a0;
  struct kvm_text* utext = (struct kvm_text*)a1;
  const void* text = utext->text;
  size_t text_size = utext->size;
  if (!vm) {
    errno = EINVAL;
    return -1;
  }
  if (vm->next_cpu_id == KVM_MAX_VCPU) {
    errno = ENOMEM;
    return -1;
  }
  int cpu_id = vm->next_cpu_id;
  int cpufd = ioctl(vm->vmfd, KVM_CREATE_VCPU, cpu_id);
  if (cpufd == -1)
    return -1;
  vm->next_cpu_id++;
  install_user_code(vm, cpufd, cpu_id, text, text_size);
  return cpufd;
}

static void kill_and_wait(int pid, int* status)
{
  kill(-pid, SIGKILL);
  kill(pid, SIGKILL);
  for (int i = 0; i < 100; i++) {
    if (waitpid(-1, status, WNOHANG | __WALL) == pid)
      return;
    usleep(1000);
  }
  DIR* dir = opendir("/sys/fs/fuse/connections");
  if (dir) {
    for (;;) {
      struct dirent* ent = readdir(dir);
      if (!ent)
        break;
      if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
        continue;
      char abort[300];
      snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort",
               ent->d_name);
      int fd = open(abort, O_WRONLY);
      if (fd == -1) {
        continue;
      }
      if (write(fd, abort, 1) < 0) {
      }
      close(fd);
    }
    closedir(dir);
  } else {
  }
  while (waitpid(-1, status, __WALL) != pid) {
  }
}

static void setup_test()
{
  prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
  setpgrp();
  write_file("/proc/self/oom_score_adj", "1000");
}

struct thread_t {
  int created, call;
  event_t ready, done;
};

static struct thread_t threads[16];
static void execute_call(int call);
static int running;

static void* thr(void* arg)
{
  struct thread_t* th = (struct thread_t*)arg;
  for (;;) {
    event_wait(&th->ready);
    event_reset(&th->ready);
    execute_call(th->call);
    __atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED);
    event_set(&th->done);
  }
  return 0;
}

static void execute_one(void)
{
  if (write(1, "executing program\n", sizeof("executing program\n") - 1)) {
  }
  int i, call, thread;
  for (call = 0; call < 10; call++) {
    for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0]));
         thread++) {
      struct thread_t* th = &threads[thread];
      if (!th->created) {
        th->created = 1;
        event_init(&th->ready);
        event_init(&th->done);
        event_set(&th->done);
        thread_start(thr, th);
      }
      if (!event_isset(&th->done))
        continue;
      event_reset(&th->done);
      th->call = call;
      __atomic_fetch_add(&running, 1, __ATOMIC_RELAXED);
      event_set(&th->ready);
      event_timedwait(&th->done, 50);
      break;
    }
  }
  for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++)
    sleep_ms(1);
}

static void execute_one(void);

#define WAIT_FLAGS __WALL

static void loop(void)
{
  int iter = 0;
  for (;; iter++) {
    int pid = fork();
    if (pid < 0)
      exit(1);
    if (pid == 0) {
      setup_test();
      execute_one();
      exit(0);
    }
    int status = 0;
    uint64_t start = current_time_ms();
    for (;;) {
      sleep_ms(10);
      if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
        break;
      if (current_time_ms() - start < 5000)
        continue;
      kill_and_wait(pid, &status);
      break;
    }
  }
}

uint64_t r[5] = {0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff,
                 0x0, 0xffffffffffffffff};

void execute_call(int call)
{
  intptr_t res = 0;
  switch (call) {
  case 0:
    //  openat$kvm arguments: [
    //    fd: const = 0xffffffffffffff9c (8 bytes)
    //    file: ptr[in, buffer] {
    //      buffer: {2f 64 65 76 2f 6b 76 6d 00} (length 0x9)
    //    }
    //    flags: open_flags = 0x0 (4 bytes)
    //    mode: const = 0x0 (2 bytes)
    //  ]
    //  returns fd_kvm
    memcpy((void*)0x200000000000, "/dev/kvm\000", 9);
    res = syscall(__NR_openat, /*fd=*/0xffffffffffffff9cul,
                  /*file=*/0x200000000000ul, /*flags=*/0, /*mode=*/0);
    if (res != -1)
      r[0] = res;
    break;
  case 1:
    //  ioctl$KVM_CREATE_VM arguments: [
    //    fd: fd_kvm (resource)
    //    cmd: const = 0xae01 (4 bytes)
    //    type: intptr = 0x0 (8 bytes)
    //  ]
    //  returns fd_kvmvm
    res = syscall(__NR_ioctl, /*fd=*/r[0], /*cmd=*/0xae01, /*type=*/0ul);
    if (res != -1)
      r[1] = res;
    break;
  case 2:
    //  ioctl$KVM_CREATE_VCPU arguments: [
    //    fd: fd_kvmvm (resource)
    //    cmd: const = 0xae41 (4 bytes)
    //    id: intptr = 0x2 (8 bytes)
    //  ]
    //  returns fd_kvmcpu
    res = syscall(__NR_ioctl, /*fd=*/r[1], /*cmd=*/0xae41, /*id=*/2ul);
    if (res != -1)
      r[2] = res;
    break;
  case 3:
    //  syz_kvm_setup_syzos_vm$x86 arguments: [
    //    fd: fd_kvmvm (resource)
    //    usermem: VMA[0x400000]
    //  ]
    //  returns kvm_syz_vm$x86
    res = -1;
    res = syz_kvm_setup_syzos_vm(/*fd=*/r[1], /*usermem=*/0x200000c00000);
    if (res != -1)
      r[3] = res;
    break;
  case 4:
    //  syz_kvm_add_vcpu$x86 arguments: [
    //    vm: kvm_syz_vm$x86 (resource)
    //    text: ptr[in, kvm_text$x86] {
    //      kvm_text$x86 {
    //        typ: const = 0x0 (8 bytes)
    //        text: ptr[in, array[syzos_api_call$x86]] {
    //          array[syzos_api_call$x86] {
    //            union syzos_api_call$x86 {
    //              wrmsr: syzos_api$x86[101, syzos_api_wrmsr] {
    //                call: const = 0x65 (8 bytes)
    //                size: bytesize = 0x20 (8 bytes)
    //                payload: syzos_api_wrmsr {
    //                  arg_reg: msr_index = 0x40000000 (8 bytes)
    //                  arg_value: int64 = 0x3 (8 bytes)
    //                }
    //              }
    //            }
    //            union syzos_api_call$x86 {
    //              wrmsr: syzos_api$x86[101, syzos_api_wrmsr] {
    //                call: const = 0x65 (8 bytes)
    //                size: bytesize = 0x20 (8 bytes)
    //                payload: syzos_api_wrmsr {
    //                  arg_reg: msr_index = 0x40000001 (8 bytes)
    //                  arg_value: int64 = 0x3f (8 bytes)
    //                }
    //              }
    //            }
    //          }
    //        }
    //        size: bytesize = 0x40 (8 bytes)
    //      }
    //    }
    //  ]
    //  returns fd_kvmcpu
    *(uint64_t*)0x200000000080 = 0;
    *(uint64_t*)0x200000000088 = 0x2000000001c0;
    *(uint64_t*)0x2000000001c0 = 0x65;
    *(uint64_t*)0x2000000001c8 = 0x20;
    *(uint64_t*)0x2000000001d0 = 0x40000000;
    *(uint64_t*)0x2000000001d8 = 3;
    *(uint64_t*)0x2000000001e0 = 0x65;
    *(uint64_t*)0x2000000001e8 = 0x20;
    *(uint64_t*)0x2000000001f0 = 0x40000001;
    *(uint64_t*)0x2000000001f8 = 0x3f;
    *(uint64_t*)0x200000000090 = 0x40;
    res = -1;
    res = syz_kvm_add_vcpu(/*vm=*/r[3], /*text=*/0x200000000080);
    if (res != -1)
      r[4] = res;
    break;
  case 5:
    //  ioctl$KVM_SET_CPUID2 arguments: [
    //    fd: fd_kvmcpu (resource)
    //    cmd: const = 0x4008ae90 (4 bytes)
    //    arg: ptr[in, kvm_cpuid2] {
    //      kvm_cpuid2 {
    //        n: len = 0x1 (4 bytes)
    //        pad: const = 0x0 (4 bytes)
    //        entries: array[kvm_cpuid_entry2] {
    //          kvm_cpuid_entry2 {
    //            func: kvm_cpu_function = 0x40000001 (4 bytes)
    //            index: int32 = 0x4 (4 bytes)
    //            flags: kvm_cpuid_flags = 0x2 (4 bytes)
    //            eax: int32 = 0x31237648 (4 bytes)
    //            ebx: int32 = 0x6 (4 bytes)
    //            ecx: int32 = 0x2 (4 bytes)
    //            edx: int32 = 0x80 (4 bytes)
    //            pad: buffer: {00 00 00 00 00 00 00 00 00 00 00 00} (length
    //            0xc)
    //          }
    //        }
    //      }
    //    }
    //  ]
    *(uint32_t*)0x200000000000 = 1;
    *(uint32_t*)0x200000000004 = 0;
    *(uint32_t*)0x200000000008 = 0x40000001;
    *(uint32_t*)0x20000000000c = 4;
    *(uint32_t*)0x200000000010 = 2;
    *(uint32_t*)0x200000000014 = 0x31237648;
    *(uint32_t*)0x200000000018 = 6;
    *(uint32_t*)0x20000000001c = 2;
    *(uint32_t*)0x200000000020 = 0x80;
    memset((void*)0x200000000024, 0, 12);
    syscall(__NR_ioctl, /*fd=*/r[4], /*cmd=*/0x4008ae90,
            /*arg=*/0x200000000000ul);
    break;
  case 6:
    //  ioctl$KVM_RUN arguments: [
    //    fd: fd_kvmcpu (resource)
    //    cmd: const = 0xae80 (4 bytes)
    //    arg: const = 0x0 (8 bytes)
    //  ]
    syscall(__NR_ioctl, /*fd=*/r[4], /*cmd=*/0xae80, /*arg=*/0ul);
    break;
  case 7:
    //  ioctl$KVM_SET_REGS arguments: [
    //    fd: fd_kvmcpu (resource)
    //    cmd: const = 0x4090ae82 (4 bytes)
    //    arg: ptr[in, kvm_regs] {
    //      kvm_regs {
    //        gp: array[int64] {
    //          int64 = 0x98f2 (8 bytes)
    //          int64 = 0x7fffffffffffffff (8 bytes)
    //          int64 = 0x2 (8 bytes)
    //          int64 = 0x1 (8 bytes)
    //          int64 = 0x9 (8 bytes)
    //          int64 = 0x4 (8 bytes)
    //          int64 = 0x1ff (8 bytes)
    //          int64 = 0x8000000000000001 (8 bytes)
    //          int64 = 0x4 (8 bytes)
    //          int64 = 0x8000000000000000 (8 bytes)
    //          int64 = 0x1 (8 bytes)
    //          int64 = 0xffffffffffff986b (8 bytes)
    //          int64 = 0x3 (8 bytes)
    //          int64 = 0x5 (8 bytes)
    //          int64 = 0xc0000000000 (8 bytes)
    //          int64 = 0x1 (8 bytes)
    //        }
    //        rip: kvm_guest_addrs = 0x0 (8 bytes)
    //        rflags: kvm_x86_rflags = 0x110900 (8 bytes)
    //      }
    //    }
    //  ]
    *(uint64_t*)0x200000000200 = 0x98f2;
    *(uint64_t*)0x200000000208 = 0x7fffffffffffffff;
    *(uint64_t*)0x200000000210 = 2;
    *(uint64_t*)0x200000000218 = 1;
    *(uint64_t*)0x200000000220 = 9;
    *(uint64_t*)0x200000000228 = 4;
    *(uint64_t*)0x200000000230 = 0x1ff;
    *(uint64_t*)0x200000000238 = 0x8000000000000001;
    *(uint64_t*)0x200000000240 = 4;
    *(uint64_t*)0x200000000248 = 0x8000000000000000;
    *(uint64_t*)0x200000000250 = 1;
    *(uint64_t*)0x200000000258 = 0xffffffffffff986b;
    *(uint64_t*)0x200000000260 = 3;
    *(uint64_t*)0x200000000268 = 5;
    *(uint64_t*)0x200000000270 = 0xc0000000000;
    *(uint64_t*)0x200000000278 = 1;
    *(uint64_t*)0x200000000280 = 0;
    *(uint64_t*)0x200000000288 = 0x110900;
    syscall(__NR_ioctl, /*fd=*/r[4], /*cmd=*/0x4090ae82,
            /*arg=*/0x200000000200ul);
    break;
  case 8:
    //  ioctl$KVM_RUN arguments: [
    //    fd: fd_kvmcpu (resource)
    //    cmd: const = 0xae80 (4 bytes)
    //    arg: const = 0x0 (8 bytes)
    //  ]
    syscall(__NR_ioctl, /*fd=*/r[4], /*cmd=*/0xae80, /*arg=*/0ul);
    break;
  case 9:
    //  ioctl$KVM_CAP_HYPERV_ENFORCE_CPUID arguments: [
    //    fd: fd_kvmcpu (resource)
    //    cmd: const = 0x4068aea3 (4 bytes)
    //    arg: ptr[in, kvm_enable_cap[KVM_CAP_HYPERV_ENFORCE_CPUID, bool64]] {
    //      kvm_enable_cap[KVM_CAP_HYPERV_ENFORCE_CPUID, bool64] {
    //        cap: const = 0xc7 (4 bytes)
    //        flags: const = 0x0 (4 bytes)
    //        args: int64 = 0x1 (8 bytes)
    //        pad = 0x0 (88 bytes)
    //      }
    //    }
    //  ]
    *(uint32_t*)0x2000000000c0 = 0xc7;
    *(uint32_t*)0x2000000000c4 = 0;
    *(uint64_t*)0x2000000000c8 = 1;
    syscall(__NR_ioctl, /*fd=*/r[2], /*cmd=*/0x4068aea3,
            /*arg=*/0x2000000000c0ul);
    break;
  }
}
int main(void)
{
  syscall(__NR_mmap, /*addr=*/0x1ffffffff000ul, /*len=*/0x1000ul, /*prot=*/0ul,
          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul,
          /*fd=*/(intptr_t)-1, /*offset=*/0ul);
  syscall(__NR_mmap, /*addr=*/0x200000000000ul, /*len=*/0x1000000ul,
          /*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/ 7ul,
          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul,
          /*fd=*/(intptr_t)-1, /*offset=*/0ul);
  syscall(__NR_mmap, /*addr=*/0x200001000000ul, /*len=*/0x1000ul, /*prot=*/0ul,
          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul,
          /*fd=*/(intptr_t)-1, /*offset=*/0ul);
  const char* reason;
  (void)reason;
  for (procid = 0; procid < 6; procid++) {
    if (fork() == 0) {
      loop();
    }
  }
  sleep(1000000);
  return 0;
}

      parent reply	other threads:[~2026-04-23 14:40 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-23 14:08 [PATCH 0/5] KVM: x86/hyperv: Fix racy usage of vcpu->arch.hyperv Sean Christopherson
2026-04-23 14:08 ` [PATCH 1/5] KVM: x86/hyperv: Get target FIFO in hv_tlb_flush_enqueue(), not caller Sean Christopherson
2026-04-23 14:08 ` [PATCH 2/5] KVM: x86/hyperv: Check for NULL vCPU Hyper-V object in kvm_hv_get_tlb_flush_fifo() Sean Christopherson
2026-04-23 14:08 ` [PATCH 3/5] KVM: x86/hyperv: Ensure vCPU's Hyper-V object is initialized on cross-vCPU accesses Sean Christopherson
2026-04-23 14:08 ` [PATCH 4/5] KVM: x86/hyperv: Assert vCPU's mutex is held in to_hv_vcpu() Sean Christopherson
2026-04-23 14:08 ` [PATCH 5/5] KVM: x86/hyperv: Use {READ,WRITE}_ONCE for cross-task synic->active accesses Sean Christopherson
2026-04-23 14:40 ` Sean Christopherson [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aeovXe9YN_AxVYeX@google.com \
    --to=seanjc@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=vkuznets@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox