All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] x86/hvm: Add Kconfig option to disable nested virtualization
@ 2026-02-06 21:05 Stefano Stabellini
  2026-02-08  2:31 ` Demi Marie Obenour
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Stefano Stabellini @ 2026-02-06 21:05 UTC (permalink / raw)
  To: xen-devel
  Cc: roger.pau, jbeulich, andrew.cooper3, jason.andryuk,
	alejandro.garciavallejo, Stefano Stabellini

Introduce CONFIG_NESTED_VIRT (default y, requires EXPERT to disable)
to allow nested virtualization support to be disabled at build time.
This is useful for embedded or safety-focused deployments where
nested virtualization is not needed, reducing code size and attack
surface.

When CONFIG_NESTED_VIRT=n, the following source files are excluded:
- arch/x86/hvm/nestedhvm.c
- arch/x86/hvm/svm/nestedsvm.c
- arch/x86/hvm/vmx/vvmx.c
- arch/x86/mm/nested.c
- arch/x86/mm/hap/nested_hap.c
- arch/x86/mm/hap/nested_ept.c

Add inline stubs where needed in headers.

No functional change when CONFIG_NESTED_VIRT=y.

Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
---
Changes in v2:
- add ASSERT_UNREACHABLE
- change default to N and remove EXPERT
- don't compile nested_hap if not VMX
- add XEN_SYSCTL_PHYSCAP_nestedhvm
- add IS_ENABLED check in hvm_nested_virt_supported 
---
 xen/arch/x86/hvm/Kconfig                 | 10 ++++
 xen/arch/x86/hvm/Makefile                |  2 +-
 xen/arch/x86/hvm/svm/Makefile            |  2 +-
 xen/arch/x86/hvm/svm/nestedhvm.h         | 60 +++++++++++++++++++++--
 xen/arch/x86/hvm/svm/svm.c               |  6 +++
 xen/arch/x86/hvm/vmx/Makefile            |  2 +-
 xen/arch/x86/hvm/vmx/vmx.c               | 10 +++-
 xen/arch/x86/include/asm/hvm/hvm.h       |  2 +-
 xen/arch/x86/include/asm/hvm/nestedhvm.h | 52 ++++++++++++++++----
 xen/arch/x86/include/asm/hvm/vmx/vvmx.h  | 62 ++++++++++++++++++++++++
 xen/arch/x86/mm/Makefile                 |  2 +-
 xen/arch/x86/mm/hap/Makefile             |  4 +-
 xen/arch/x86/mm/p2m.h                    |  6 +++
 xen/arch/x86/sysctl.c                    |  2 +
 xen/include/public/sysctl.h              |  4 +-
 15 files changed, 202 insertions(+), 24 deletions(-)

diff --git a/xen/arch/x86/hvm/Kconfig b/xen/arch/x86/hvm/Kconfig
index f32bf5cbb7..133f19a063 100644
--- a/xen/arch/x86/hvm/Kconfig
+++ b/xen/arch/x86/hvm/Kconfig
@@ -92,4 +92,14 @@ config MEM_SHARING
 	bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED
 	depends on INTEL_VMX
 
+config NESTED_VIRT
+	bool "Nested virtualization support"
+	depends on AMD_SVM || INTEL_VMX
+	default n
+	help
+	  Enable nested virtualization, allowing guests to run their own
+	  hypervisors. This requires hardware support.
+
+	  If unsure, say N.
+
 endif
diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
index f34fb03934..b8a0a68624 100644
--- a/xen/arch/x86/hvm/Makefile
+++ b/xen/arch/x86/hvm/Makefile
@@ -18,7 +18,7 @@ obj-y += irq.o
 obj-y += mmio.o
 obj-$(CONFIG_VM_EVENT) += monitor.o
 obj-y += mtrr.o
-obj-y += nestedhvm.o
+obj-$(CONFIG_NESTED_VIRT) += nestedhvm.o
 obj-y += pmtimer.o
 obj-y += quirks.o
 obj-y += rtc.o
diff --git a/xen/arch/x86/hvm/svm/Makefile b/xen/arch/x86/hvm/svm/Makefile
index 8a072cafd5..92418e3444 100644
--- a/xen/arch/x86/hvm/svm/Makefile
+++ b/xen/arch/x86/hvm/svm/Makefile
@@ -2,6 +2,6 @@ obj-y += asid.o
 obj-y += emulate.o
 obj-bin-y += entry.o
 obj-y += intr.o
-obj-y += nestedsvm.o
+obj-$(CONFIG_NESTED_VIRT) += nestedsvm.o
 obj-y += svm.o
 obj-y += vmcb.o
diff --git a/xen/arch/x86/hvm/svm/nestedhvm.h b/xen/arch/x86/hvm/svm/nestedhvm.h
index 9bfed5ffd7..ed1aa847e5 100644
--- a/xen/arch/x86/hvm/svm/nestedhvm.h
+++ b/xen/arch/x86/hvm/svm/nestedhvm.h
@@ -26,6 +26,13 @@
 #define nsvm_efer_svm_enabled(v) \
     (!!((v)->arch.hvm.guest_efer & EFER_SVME))
 
+#define NSVM_INTR_NOTHANDLED     3
+#define NSVM_INTR_NOTINTERCEPTED 2
+#define NSVM_INTR_FORCEVMEXIT    1
+#define NSVM_INTR_MASKED         0
+
+#ifdef CONFIG_NESTED_VIRT
+
 int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr);
 void nestedsvm_vmexit_defer(struct vcpu *v,
     uint64_t exitcode, uint64_t exitinfo1, uint64_t exitinfo2);
@@ -57,13 +64,56 @@ int cf_check nsvm_hap_walk_L1_p2m(
     struct vcpu *v, paddr_t L2_gpa, paddr_t *L1_gpa, unsigned int *page_order,
     uint8_t *p2m_acc, struct npfec npfec);
 
-#define NSVM_INTR_NOTHANDLED     3
-#define NSVM_INTR_NOTINTERCEPTED 2
-#define NSVM_INTR_FORCEVMEXIT    1
-#define NSVM_INTR_MASKED         0
-
 int nestedsvm_vcpu_interrupt(struct vcpu *v, const struct hvm_intack intack);
 
+#else /* !CONFIG_NESTED_VIRT */
+
+static inline int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr)
+{
+    ASSERT_UNREACHABLE();
+    return -EOPNOTSUPP;
+}
+static inline void nestedsvm_vmexit_defer(struct vcpu *v,
+    uint64_t exitcode, uint64_t exitinfo1, uint64_t exitinfo2)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline enum nestedhvm_vmexits nestedsvm_vmexit_n2n1(struct vcpu *v,
+    struct cpu_user_regs *regs)
+{
+    ASSERT_UNREACHABLE();
+    return NESTEDHVM_VMEXIT_ERROR;
+}
+static inline enum nestedhvm_vmexits nestedsvm_check_intercepts(struct vcpu *v,
+    struct cpu_user_regs *regs, uint64_t exitcode)
+{
+    ASSERT_UNREACHABLE();
+    return NESTEDHVM_VMEXIT_ERROR;
+}
+static inline void svm_nested_features_on_efer_update(struct vcpu *v)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline void svm_vmexit_do_clgi(struct cpu_user_regs *regs,
+                                      struct vcpu *v)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline void svm_vmexit_do_stgi(struct cpu_user_regs *regs,
+                                       struct vcpu *v)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline bool nestedsvm_gif_isset(struct vcpu *v) { return true; }
+static inline int nestedsvm_vcpu_interrupt(struct vcpu *v,
+                                           const struct hvm_intack intack)
+{
+    ASSERT_UNREACHABLE();
+    return NSVM_INTR_NOTINTERCEPTED;
+}
+
+#endif /* CONFIG_NESTED_VIRT */
+
 #endif /* __X86_HVM_SVM_NESTEDHVM_PRIV_H__ */
 
 /*
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 18ba837738..0234b57afb 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -46,6 +46,10 @@
 
 void noreturn svm_asm_do_resume(void);
 
+#ifndef CONFIG_NESTED_VIRT
+void asmlinkage nsvm_vcpu_switch(void) { }
+#endif
+
 u32 svm_feature_flags;
 
 /*
@@ -2465,6 +2469,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
     .set_rdtsc_exiting    = svm_set_rdtsc_exiting,
     .get_insn_bytes       = svm_get_insn_bytes,
 
+#ifdef CONFIG_NESTED_VIRT
     .nhvm_vcpu_initialise = nsvm_vcpu_initialise,
     .nhvm_vcpu_destroy = nsvm_vcpu_destroy,
     .nhvm_vcpu_reset = nsvm_vcpu_reset,
@@ -2474,6 +2479,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
     .nhvm_vmcx_hap_enabled = nsvm_vmcb_hap_enabled,
     .nhvm_intr_blocked = nsvm_intr_blocked,
     .nhvm_hap_walk_L1_p2m = nsvm_hap_walk_L1_p2m,
+#endif
 
     .get_reg = svm_get_reg,
     .set_reg = svm_set_reg,
diff --git a/xen/arch/x86/hvm/vmx/Makefile b/xen/arch/x86/hvm/vmx/Makefile
index 04a29ce59d..902564b3e2 100644
--- a/xen/arch/x86/hvm/vmx/Makefile
+++ b/xen/arch/x86/hvm/vmx/Makefile
@@ -3,4 +3,4 @@ obj-y += intr.o
 obj-y += realmode.o
 obj-y += vmcs.o
 obj-y += vmx.o
-obj-y += vvmx.o
+obj-$(CONFIG_NESTED_VIRT) += vvmx.o
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 82c55f49ae..252f27322b 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -55,6 +55,10 @@
 #include <public/hvm/save.h>
 #include <public/sched.h>
 
+#ifndef CONFIG_NESTED_VIRT
+void asmlinkage nvmx_switch_guest(void) { }
+#endif
+
 static bool __initdata opt_force_ept;
 boolean_param("force-ept", opt_force_ept);
 
@@ -2033,7 +2037,7 @@ static void nvmx_enqueue_n2_exceptions(struct vcpu *v,
                  nvmx->intr.intr_info, nvmx->intr.error_code);
 }
 
-static int cf_check nvmx_vmexit_event(
+static int cf_check __maybe_unused nvmx_vmexit_event(
     struct vcpu *v, const struct x86_event *event)
 {
     nvmx_enqueue_n2_exceptions(v, event->vector, event->error_code,
@@ -2933,6 +2937,7 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
     .handle_cd            = vmx_handle_cd,
     .set_info_guest       = vmx_set_info_guest,
     .set_rdtsc_exiting    = vmx_set_rdtsc_exiting,
+#ifdef CONFIG_NESTED_VIRT
     .nhvm_vcpu_initialise = nvmx_vcpu_initialise,
     .nhvm_vcpu_destroy    = nvmx_vcpu_destroy,
     .nhvm_vcpu_reset      = nvmx_vcpu_reset,
@@ -2942,8 +2947,9 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
     .nhvm_vcpu_vmexit_event = nvmx_vmexit_event,
     .nhvm_intr_blocked    = nvmx_intr_blocked,
     .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources,
-    .update_vlapic_mode = vmx_vlapic_msr_changed,
     .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
+#endif
+    .update_vlapic_mode = vmx_vlapic_msr_changed,
 #ifdef CONFIG_VM_EVENT
     .enable_msr_interception = vmx_enable_msr_interception,
 #endif
diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
index 7d9774df59..536a38b450 100644
--- a/xen/arch/x86/include/asm/hvm/hvm.h
+++ b/xen/arch/x86/include/asm/hvm/hvm.h
@@ -711,7 +711,7 @@ static inline bool hvm_altp2m_supported(void)
 /* Returns true if we have the minimum hardware requirements for nested virt */
 static inline bool hvm_nested_virt_supported(void)
 {
-    return hvm_funcs.caps.nested_virt;
+    return IS_ENABLED(CONFIG_NESTED_VIRT) && hvm_funcs.caps.nested_virt;
 }
 
 #ifdef CONFIG_ALTP2M
diff --git a/xen/arch/x86/include/asm/hvm/nestedhvm.h b/xen/arch/x86/include/asm/hvm/nestedhvm.h
index ea2c1bc328..e18d59e0eb 100644
--- a/xen/arch/x86/include/asm/hvm/nestedhvm.h
+++ b/xen/arch/x86/include/asm/hvm/nestedhvm.h
@@ -25,9 +25,21 @@ enum nestedhvm_vmexits {
 /* Nested HVM on/off per domain */
 static inline bool nestedhvm_enabled(const struct domain *d)
 {
-    return IS_ENABLED(CONFIG_HVM) && (d->options & XEN_DOMCTL_CDF_nested_virt);
+    return IS_ENABLED(CONFIG_NESTED_VIRT) &&
+           (d->options & XEN_DOMCTL_CDF_nested_virt);
 }
 
+/* Nested paging */
+#define NESTEDHVM_PAGEFAULT_DONE       0
+#define NESTEDHVM_PAGEFAULT_INJECT     1
+#define NESTEDHVM_PAGEFAULT_L1_ERROR   2
+#define NESTEDHVM_PAGEFAULT_L0_ERROR   3
+#define NESTEDHVM_PAGEFAULT_MMIO       4
+#define NESTEDHVM_PAGEFAULT_RETRY      5
+#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
+
+#ifdef CONFIG_NESTED_VIRT
+
 /* Nested VCPU */
 int nestedhvm_vcpu_initialise(struct vcpu *v);
 void nestedhvm_vcpu_destroy(struct vcpu *v);
@@ -38,14 +50,6 @@ bool nestedhvm_vcpu_in_guestmode(struct vcpu *v);
 #define nestedhvm_vcpu_exit_guestmode(v)  \
     vcpu_nestedhvm(v).nv_guestmode = 0
 
-/* Nested paging */
-#define NESTEDHVM_PAGEFAULT_DONE       0
-#define NESTEDHVM_PAGEFAULT_INJECT     1
-#define NESTEDHVM_PAGEFAULT_L1_ERROR   2
-#define NESTEDHVM_PAGEFAULT_L0_ERROR   3
-#define NESTEDHVM_PAGEFAULT_MMIO       4
-#define NESTEDHVM_PAGEFAULT_RETRY      5
-#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
 int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
                                     struct npfec npfec);
 
@@ -59,6 +63,36 @@ unsigned long *nestedhvm_vcpu_iomap_get(bool ioport_80, bool ioport_ed);
 
 void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m);
 
+#else /* !CONFIG_NESTED_VIRT */
+
+static inline int nestedhvm_vcpu_initialise(struct vcpu *v)
+{
+    ASSERT_UNREACHABLE();
+    return -EOPNOTSUPP;
+}
+static inline void nestedhvm_vcpu_destroy(struct vcpu *v) { }
+static inline void nestedhvm_vcpu_reset(struct vcpu *v)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline bool nestedhvm_vcpu_in_guestmode(struct vcpu *v) { return false; }
+static inline int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
+                                                  struct npfec npfec)
+{
+    ASSERT_UNREACHABLE();
+    return NESTEDHVM_PAGEFAULT_L0_ERROR;
+}
+#define nestedhvm_vcpu_enter_guestmode(v) do { ASSERT_UNREACHABLE(); } while (0)
+#define nestedhvm_vcpu_exit_guestmode(v)  do { ASSERT_UNREACHABLE(); } while (0)
+#define nestedhvm_paging_mode_hap(v) false
+#define nestedhvm_vmswitch_in_progress(v) false
+static inline void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m)
+{
+    ASSERT_UNREACHABLE();
+}
+
+#endif /* CONFIG_NESTED_VIRT */
+
 static inline bool nestedhvm_is_n2(struct vcpu *v)
 {
     if ( !nestedhvm_enabled(v->domain) ||
diff --git a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
index da10d3fa96..ad56cdf01e 100644
--- a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
+++ b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
@@ -73,6 +73,8 @@ union vmx_inst_info {
     u32 word;
 };
 
+#ifdef CONFIG_NESTED_VIRT
+
 int cf_check nvmx_vcpu_initialise(struct vcpu *v);
 void cf_check nvmx_vcpu_destroy(struct vcpu *v);
 int cf_check nvmx_vcpu_reset(struct vcpu *v);
@@ -199,5 +201,65 @@ int nept_translate_l2ga(struct vcpu *v, paddr_t l2ga,
                         uint64_t *exit_qual, uint32_t *exit_reason);
 int nvmx_cpu_up_prepare(unsigned int cpu);
 void nvmx_cpu_dead(unsigned int cpu);
+
+#else /* !CONFIG_NESTED_VIRT */
+
+static inline void nvmx_update_exec_control(struct vcpu *v, u32 value)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline void nvmx_update_secondary_exec_control(struct vcpu *v,
+                                                      unsigned long value)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline void nvmx_update_exception_bitmap(struct vcpu *v,
+                                                unsigned long value)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline u64 nvmx_get_tsc_offset(struct vcpu *v)
+{
+    ASSERT_UNREACHABLE();
+    return 0;
+}
+static inline void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline bool nvmx_intercepts_exception(struct vcpu *v, unsigned int vector,
+                                             int error_code)
+{
+    ASSERT_UNREACHABLE();
+    return false;
+}
+static inline int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs,
+                                         unsigned int exit_reason)
+{
+    ASSERT_UNREACHABLE();
+    return 0;
+}
+static inline void nvmx_idtv_handling(void)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content)
+{
+    ASSERT_UNREACHABLE();
+    return 0;
+}
+static inline int nvmx_handle_vmx_insn(struct cpu_user_regs *regs,
+                                       unsigned int exit_reason)
+{
+    ASSERT_UNREACHABLE();
+    return 0;
+}
+static inline int nvmx_cpu_up_prepare(unsigned int cpu) { return 0; }
+static inline void nvmx_cpu_dead(unsigned int cpu) { }
+
+#define get_vvmcs(vcpu, encoding) 0
+
+#endif /* CONFIG_NESTED_VIRT */
+
 #endif /* __ASM_X86_HVM_VVMX_H__ */
 
diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index 960f6e8409..aa15811c2e 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_SHADOW_PAGING) += guest_walk_4.o
 obj-$(CONFIG_VM_EVENT) += mem_access.o
 obj-$(CONFIG_MEM_PAGING) += mem_paging.o
 obj-$(CONFIG_MEM_SHARING) += mem_sharing.o
-obj-$(CONFIG_HVM) += nested.o
+obj-$(CONFIG_NESTED_VIRT) += nested.o
 obj-$(CONFIG_HVM) += p2m.o
 obj-y += p2m-basic.o
 obj-$(CONFIG_INTEL_VMX) += p2m-ept.o
diff --git a/xen/arch/x86/mm/hap/Makefile b/xen/arch/x86/mm/hap/Makefile
index 67c29b2162..2082dfee2d 100644
--- a/xen/arch/x86/mm/hap/Makefile
+++ b/xen/arch/x86/mm/hap/Makefile
@@ -2,5 +2,5 @@ obj-y += hap.o
 obj-y += guest_walk_2.o
 obj-y += guest_walk_3.o
 obj-y += guest_walk_4.o
-obj-y += nested_hap.o
-obj-$(CONFIG_INTEL_VMX) += nested_ept.o
+obj-$(CONFIG_NESTED_VIRT) += nested_hap.o
+obj-$(filter $(CONFIG_NESTED_VIRT),$(CONFIG_INTEL_VMX)) += nested_ept.o
diff --git a/xen/arch/x86/mm/p2m.h b/xen/arch/x86/mm/p2m.h
index 635f5a7f45..63808dddcc 100644
--- a/xen/arch/x86/mm/p2m.h
+++ b/xen/arch/x86/mm/p2m.h
@@ -25,9 +25,15 @@ void p2m_teardown_altp2m(struct domain *d);
 void p2m_flush_table_locked(struct p2m_domain *p2m);
 int __must_check p2m_remove_entry(struct p2m_domain *p2m, gfn_t gfn, mfn_t mfn,
                                   unsigned int page_order);
+#ifdef CONFIG_NESTED_VIRT
 void p2m_nestedp2m_init(struct p2m_domain *p2m);
 int p2m_init_nestedp2m(struct domain *d);
 void p2m_teardown_nestedp2m(struct domain *d);
+#else
+static inline void p2m_nestedp2m_init(struct p2m_domain *p2m) { }
+static inline int p2m_init_nestedp2m(struct domain *d) { return 0; }
+static inline void p2m_teardown_nestedp2m(struct domain *d) { }
+#endif
 
 int ept_p2m_init(struct p2m_domain *p2m);
 void ept_p2m_uninit(struct p2m_domain *p2m);
diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
index 1b04947516..b1d865e1c8 100644
--- a/xen/arch/x86/sysctl.c
+++ b/xen/arch/x86/sysctl.c
@@ -103,6 +103,8 @@ void arch_do_physinfo(struct xen_sysctl_physinfo *pi)
         pi->capabilities |= XEN_SYSCTL_PHYSCAP_hap;
     if ( IS_ENABLED(CONFIG_SHADOW_PAGING) )
         pi->capabilities |= XEN_SYSCTL_PHYSCAP_shadow;
+    if ( hvm_nested_virt_supported() )
+        pi->capabilities |= XEN_SYSCTL_PHYSCAP_nestedhvm;
 }
 
 long arch_do_sysctl(
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 66c9b65465..b4bd1dd7b2 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -100,9 +100,11 @@ struct xen_sysctl_tbuf_op {
 /* Xen supports the Grant v1 and/or v2 ABIs. */
 #define XEN_SYSCTL_PHYSCAP_gnttab_v1     (1u << 8)
 #define XEN_SYSCTL_PHYSCAP_gnttab_v2     (1u << 9)
+/* The platform supports nested HVM. */
+#define XEN_SYSCTL_PHYSCAP_nestedhvm     (1u << 10)
 
 /* Max XEN_SYSCTL_PHYSCAP_* constant.  Used for ABI checking. */
-#define XEN_SYSCTL_PHYSCAP_MAX XEN_SYSCTL_PHYSCAP_gnttab_v2
+#define XEN_SYSCTL_PHYSCAP_MAX XEN_SYSCTL_PHYSCAP_nestedhvm
 
 #if defined(__arm__) || defined(__aarch64__)
 #define XEN_SYSCTL_PHYSCAP_ARM_SVE_MASK  (0x1FU)
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] x86/hvm: Add Kconfig option to disable nested virtualization
  2026-02-06 21:05 [PATCH v2] x86/hvm: Add Kconfig option to disable nested virtualization Stefano Stabellini
@ 2026-02-08  2:31 ` Demi Marie Obenour
  2026-02-09 11:08 ` Roger Pau Monné
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Demi Marie Obenour @ 2026-02-08  2:31 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: roger.pau, jbeulich, andrew.cooper3, jason.andryuk,
	alejandro.garciavallejo


[-- Attachment #1.1.1: Type: text/plain, Size: 1031 bytes --]

On 2/6/26 16:05, Stefano Stabellini wrote:
> Introduce CONFIG_NESTED_VIRT (default y, requires EXPERT to disable)
> to allow nested virtualization support to be disabled at build time.
> This is useful for embedded or safety-focused deployments where
> nested virtualization is not needed, reducing code size and attack
> surface.
> 
> When CONFIG_NESTED_VIRT=n, the following source files are excluded:
> - arch/x86/hvm/nestedhvm.c
> - arch/x86/hvm/svm/nestedsvm.c
> - arch/x86/hvm/vmx/vvmx.c
> - arch/x86/mm/nested.c
> - arch/x86/mm/hap/nested_hap.c
> - arch/x86/mm/hap/nested_ept.c
> 
> Add inline stubs where needed in headers.
> 
> No functional change when CONFIG_NESTED_VIRT=y.
> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>

Commit message is stale: the default is now N and the dependency on
EXPERT was removed.

Personally, I would add a dependency on UNSUPPORTED, since it is
known to not work well and should not be used.
-- 
Sincerely,
Demi Marie Obenour (she/her/hers)

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 7253 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] x86/hvm: Add Kconfig option to disable nested virtualization
  2026-02-06 21:05 [PATCH v2] x86/hvm: Add Kconfig option to disable nested virtualization Stefano Stabellini
  2026-02-08  2:31 ` Demi Marie Obenour
@ 2026-02-09 11:08 ` Roger Pau Monné
  2026-02-13 21:56   ` Stefano Stabellini
  2026-02-09 14:55 ` Alejandro Vallejo
  2026-02-09 15:09 ` Jan Beulich
  3 siblings, 1 reply; 10+ messages in thread
From: Roger Pau Monné @ 2026-02-09 11:08 UTC (permalink / raw)
  To: Stefano Stabellini
  Cc: xen-devel, jbeulich, andrew.cooper3, jason.andryuk,
	alejandro.garciavallejo

On Fri, Feb 06, 2026 at 01:05:54PM -0800, Stefano Stabellini wrote:
> Introduce CONFIG_NESTED_VIRT (default y, requires EXPERT to disable)
> to allow nested virtualization support to be disabled at build time.
> This is useful for embedded or safety-focused deployments where
> nested virtualization is not needed, reducing code size and attack
> surface.
> 
> When CONFIG_NESTED_VIRT=n, the following source files are excluded:
> - arch/x86/hvm/nestedhvm.c
> - arch/x86/hvm/svm/nestedsvm.c
> - arch/x86/hvm/vmx/vvmx.c
> - arch/x86/mm/nested.c
> - arch/x86/mm/hap/nested_hap.c
> - arch/x86/mm/hap/nested_ept.c
> 
> Add inline stubs where needed in headers.
> 
> No functional change when CONFIG_NESTED_VIRT=y.
> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
> ---
> Changes in v2:
> - add ASSERT_UNREACHABLE
> - change default to N and remove EXPERT
> - don't compile nested_hap if not VMX
> - add XEN_SYSCTL_PHYSCAP_nestedhvm
> - add IS_ENABLED check in hvm_nested_virt_supported 
> ---
>  xen/arch/x86/hvm/Kconfig                 | 10 ++++
>  xen/arch/x86/hvm/Makefile                |  2 +-
>  xen/arch/x86/hvm/svm/Makefile            |  2 +-
>  xen/arch/x86/hvm/svm/nestedhvm.h         | 60 +++++++++++++++++++++--
>  xen/arch/x86/hvm/svm/svm.c               |  6 +++
>  xen/arch/x86/hvm/vmx/Makefile            |  2 +-
>  xen/arch/x86/hvm/vmx/vmx.c               | 10 +++-
>  xen/arch/x86/include/asm/hvm/hvm.h       |  2 +-
>  xen/arch/x86/include/asm/hvm/nestedhvm.h | 52 ++++++++++++++++----
>  xen/arch/x86/include/asm/hvm/vmx/vvmx.h  | 62 ++++++++++++++++++++++++
>  xen/arch/x86/mm/Makefile                 |  2 +-
>  xen/arch/x86/mm/hap/Makefile             |  4 +-
>  xen/arch/x86/mm/p2m.h                    |  6 +++
>  xen/arch/x86/sysctl.c                    |  2 +
>  xen/include/public/sysctl.h              |  4 +-
>  15 files changed, 202 insertions(+), 24 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/Kconfig b/xen/arch/x86/hvm/Kconfig
> index f32bf5cbb7..133f19a063 100644
> --- a/xen/arch/x86/hvm/Kconfig
> +++ b/xen/arch/x86/hvm/Kconfig
> @@ -92,4 +92,14 @@ config MEM_SHARING
>  	bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED
>  	depends on INTEL_VMX
>  
> +config NESTED_VIRT
> +	bool "Nested virtualization support"
> +	depends on AMD_SVM || INTEL_VMX
> +	default n
> +	help
> +	  Enable nested virtualization, allowing guests to run their own
> +	  hypervisors. This requires hardware support.
> +
> +	  If unsure, say N.
> +
>  endif
> diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
> index f34fb03934..b8a0a68624 100644
> --- a/xen/arch/x86/hvm/Makefile
> +++ b/xen/arch/x86/hvm/Makefile
> @@ -18,7 +18,7 @@ obj-y += irq.o
>  obj-y += mmio.o
>  obj-$(CONFIG_VM_EVENT) += monitor.o
>  obj-y += mtrr.o
> -obj-y += nestedhvm.o
> +obj-$(CONFIG_NESTED_VIRT) += nestedhvm.o
>  obj-y += pmtimer.o
>  obj-y += quirks.o
>  obj-y += rtc.o
> diff --git a/xen/arch/x86/hvm/svm/Makefile b/xen/arch/x86/hvm/svm/Makefile
> index 8a072cafd5..92418e3444 100644
> --- a/xen/arch/x86/hvm/svm/Makefile
> +++ b/xen/arch/x86/hvm/svm/Makefile
> @@ -2,6 +2,6 @@ obj-y += asid.o
>  obj-y += emulate.o
>  obj-bin-y += entry.o
>  obj-y += intr.o
> -obj-y += nestedsvm.o
> +obj-$(CONFIG_NESTED_VIRT) += nestedsvm.o
>  obj-y += svm.o
>  obj-y += vmcb.o
> diff --git a/xen/arch/x86/hvm/svm/nestedhvm.h b/xen/arch/x86/hvm/svm/nestedhvm.h
> index 9bfed5ffd7..ed1aa847e5 100644
> --- a/xen/arch/x86/hvm/svm/nestedhvm.h
> +++ b/xen/arch/x86/hvm/svm/nestedhvm.h
> @@ -26,6 +26,13 @@
>  #define nsvm_efer_svm_enabled(v) \
>      (!!((v)->arch.hvm.guest_efer & EFER_SVME))

I wonder whether you also want to force this one to return false if
NETSED_VIRT is disabled.  That would likely lead to more DCE by the
compiler:

#define nsvm_efer_svm_enabled(v) \
    (IS_ENABLED(CONFIG_NESTED_VIRT) &&
     !!((v)->arch.hvm.guest_efer & EFER_SVME))

>  
> +#define NSVM_INTR_NOTHANDLED     3
> +#define NSVM_INTR_NOTINTERCEPTED 2
> +#define NSVM_INTR_FORCEVMEXIT    1
> +#define NSVM_INTR_MASKED         0
> +
> +#ifdef CONFIG_NESTED_VIRT
> +
>  int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr);
>  void nestedsvm_vmexit_defer(struct vcpu *v,
>      uint64_t exitcode, uint64_t exitinfo1, uint64_t exitinfo2);
> @@ -57,13 +64,56 @@ int cf_check nsvm_hap_walk_L1_p2m(
>      struct vcpu *v, paddr_t L2_gpa, paddr_t *L1_gpa, unsigned int *page_order,
>      uint8_t *p2m_acc, struct npfec npfec);
>  
> -#define NSVM_INTR_NOTHANDLED     3
> -#define NSVM_INTR_NOTINTERCEPTED 2
> -#define NSVM_INTR_FORCEVMEXIT    1
> -#define NSVM_INTR_MASKED         0
> -
>  int nestedsvm_vcpu_interrupt(struct vcpu *v, const struct hvm_intack intack);
>  
> +#else /* !CONFIG_NESTED_VIRT */
> +
> +static inline int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr)
> +{
> +    ASSERT_UNREACHABLE();
> +    return -EOPNOTSUPP;

I think this is guest-reachable code even when nested virt is build
time disabled:

VMEXIT_VMRUN -> svm_vmexit_do_vmrun() -> nestedsvm_vmcb_map().

I think you rely on nsvm_efer_svm_enabled() always returning false, as
all calls to nestedsvm_vmcb_map() are gated on nsvm_efer_svm_enabled()
returning true.

> +}
> +static inline void nestedsvm_vmexit_defer(struct vcpu *v,
> +    uint64_t exitcode, uint64_t exitinfo1, uint64_t exitinfo2)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +static inline enum nestedhvm_vmexits nestedsvm_vmexit_n2n1(struct vcpu *v,
> +    struct cpu_user_regs *regs)
> +{
> +    ASSERT_UNREACHABLE();
> +    return NESTEDHVM_VMEXIT_ERROR;
> +}
> +static inline enum nestedhvm_vmexits nestedsvm_check_intercepts(struct vcpu *v,
> +    struct cpu_user_regs *regs, uint64_t exitcode)
> +{
> +    ASSERT_UNREACHABLE();
> +    return NESTEDHVM_VMEXIT_ERROR;

For the above two you possibly want to return
NESTEDHVM_VMEXIT_FATALERROR so that the L1 guest is killed.  This
should be unreachable code when nested virt is build disabled, and
hence it's safer to kill the guest, rather than attempting to report
an error to L1.

> +}
> +static inline void svm_nested_features_on_efer_update(struct vcpu *v)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +static inline void svm_vmexit_do_clgi(struct cpu_user_regs *regs,
> +                                      struct vcpu *v)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +static inline void svm_vmexit_do_stgi(struct cpu_user_regs *regs,
> +                                       struct vcpu *v)
> +{
> +    ASSERT_UNREACHABLE();
> +}

Aren't those last two guest reachable if it tries to execute an STGI
or CLGI instruction?  I see the svm exit handler calling straight into
svm_vmexit_do_{st,cl}gi() as a result of VMEXIT_{ST,CL}GI vmexit.


> +static inline bool nestedsvm_gif_isset(struct vcpu *v) { return true; }

I think this one above is really unreachable when `nestedsvm.c` is not
build, and hence you could even remove the handler here.

> +static inline int nestedsvm_vcpu_interrupt(struct vcpu *v,
> +                                           const struct hvm_intack intack)
> +{
> +    ASSERT_UNREACHABLE();
> +    return NSVM_INTR_NOTINTERCEPTED;
> +}
> +
> +#endif /* CONFIG_NESTED_VIRT */
> +
>  #endif /* __X86_HVM_SVM_NESTEDHVM_PRIV_H__ */
>  
>  /*
> diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
> index 18ba837738..0234b57afb 100644
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -46,6 +46,10 @@
>  
>  void noreturn svm_asm_do_resume(void);
>  
> +#ifndef CONFIG_NESTED_VIRT
> +void asmlinkage nsvm_vcpu_switch(void) { }
> +#endif

Instead of adding the ifdefs here, you could maybe add them to
svm/entry.S and elide the call to nsvm_vcpu_switch() altogether?

> +
>  u32 svm_feature_flags;
>  
>  /*
> @@ -2465,6 +2469,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
>      .set_rdtsc_exiting    = svm_set_rdtsc_exiting,
>      .get_insn_bytes       = svm_get_insn_bytes,
>  
> +#ifdef CONFIG_NESTED_VIRT
>      .nhvm_vcpu_initialise = nsvm_vcpu_initialise,
>      .nhvm_vcpu_destroy = nsvm_vcpu_destroy,
>      .nhvm_vcpu_reset = nsvm_vcpu_reset,
> @@ -2474,6 +2479,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
>      .nhvm_vmcx_hap_enabled = nsvm_vmcb_hap_enabled,
>      .nhvm_intr_blocked = nsvm_intr_blocked,
>      .nhvm_hap_walk_L1_p2m = nsvm_hap_walk_L1_p2m,
> +#endif
>  
>      .get_reg = svm_get_reg,
>      .set_reg = svm_set_reg,
> diff --git a/xen/arch/x86/hvm/vmx/Makefile b/xen/arch/x86/hvm/vmx/Makefile
> index 04a29ce59d..902564b3e2 100644
> --- a/xen/arch/x86/hvm/vmx/Makefile
> +++ b/xen/arch/x86/hvm/vmx/Makefile
> @@ -3,4 +3,4 @@ obj-y += intr.o
>  obj-y += realmode.o
>  obj-y += vmcs.o
>  obj-y += vmx.o
> -obj-y += vvmx.o
> +obj-$(CONFIG_NESTED_VIRT) += vvmx.o
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 82c55f49ae..252f27322b 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -55,6 +55,10 @@
>  #include <public/hvm/save.h>
>  #include <public/sched.h>
>  
> +#ifndef CONFIG_NESTED_VIRT
> +void asmlinkage nvmx_switch_guest(void) { }
> +#endif

Similar to my remark above, you could likely move the ifdef to the
assembly call site.

>  static bool __initdata opt_force_ept;
>  boolean_param("force-ept", opt_force_ept);
>  
> @@ -2033,7 +2037,7 @@ static void nvmx_enqueue_n2_exceptions(struct vcpu *v,
>                   nvmx->intr.intr_info, nvmx->intr.error_code);
>  }
>  
> -static int cf_check nvmx_vmexit_event(
> +static int cf_check __maybe_unused nvmx_vmexit_event(

Maybe you can move this one to the vvmx.c file?  It seems like an odd
one to have defined in vmx.c.

>      struct vcpu *v, const struct x86_event *event)
>  {
>      nvmx_enqueue_n2_exceptions(v, event->vector, event->error_code,
> @@ -2933,6 +2937,7 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
>      .handle_cd            = vmx_handle_cd,
>      .set_info_guest       = vmx_set_info_guest,
>      .set_rdtsc_exiting    = vmx_set_rdtsc_exiting,
> +#ifdef CONFIG_NESTED_VIRT
>      .nhvm_vcpu_initialise = nvmx_vcpu_initialise,
>      .nhvm_vcpu_destroy    = nvmx_vcpu_destroy,
>      .nhvm_vcpu_reset      = nvmx_vcpu_reset,
> @@ -2942,8 +2947,9 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
>      .nhvm_vcpu_vmexit_event = nvmx_vmexit_event,
>      .nhvm_intr_blocked    = nvmx_intr_blocked,
>      .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources,
> -    .update_vlapic_mode = vmx_vlapic_msr_changed,
>      .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
> +#endif
> +    .update_vlapic_mode = vmx_vlapic_msr_changed,
>  #ifdef CONFIG_VM_EVENT
>      .enable_msr_interception = vmx_enable_msr_interception,
>  #endif
> diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
> index 7d9774df59..536a38b450 100644
> --- a/xen/arch/x86/include/asm/hvm/hvm.h
> +++ b/xen/arch/x86/include/asm/hvm/hvm.h
> @@ -711,7 +711,7 @@ static inline bool hvm_altp2m_supported(void)
>  /* Returns true if we have the minimum hardware requirements for nested virt */
>  static inline bool hvm_nested_virt_supported(void)
>  {
> -    return hvm_funcs.caps.nested_virt;
> +    return IS_ENABLED(CONFIG_NESTED_VIRT) && hvm_funcs.caps.nested_virt;
>  }
>  
>  #ifdef CONFIG_ALTP2M
> diff --git a/xen/arch/x86/include/asm/hvm/nestedhvm.h b/xen/arch/x86/include/asm/hvm/nestedhvm.h
> index ea2c1bc328..e18d59e0eb 100644
> --- a/xen/arch/x86/include/asm/hvm/nestedhvm.h
> +++ b/xen/arch/x86/include/asm/hvm/nestedhvm.h
> @@ -25,9 +25,21 @@ enum nestedhvm_vmexits {
>  /* Nested HVM on/off per domain */
>  static inline bool nestedhvm_enabled(const struct domain *d)
>  {
> -    return IS_ENABLED(CONFIG_HVM) && (d->options & XEN_DOMCTL_CDF_nested_virt);
> +    return IS_ENABLED(CONFIG_NESTED_VIRT) &&
> +           (d->options & XEN_DOMCTL_CDF_nested_virt);
>  }
>  
> +/* Nested paging */
> +#define NESTEDHVM_PAGEFAULT_DONE       0
> +#define NESTEDHVM_PAGEFAULT_INJECT     1
> +#define NESTEDHVM_PAGEFAULT_L1_ERROR   2
> +#define NESTEDHVM_PAGEFAULT_L0_ERROR   3
> +#define NESTEDHVM_PAGEFAULT_MMIO       4
> +#define NESTEDHVM_PAGEFAULT_RETRY      5
> +#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
> +
> +#ifdef CONFIG_NESTED_VIRT
> +
>  /* Nested VCPU */
>  int nestedhvm_vcpu_initialise(struct vcpu *v);
>  void nestedhvm_vcpu_destroy(struct vcpu *v);
> @@ -38,14 +50,6 @@ bool nestedhvm_vcpu_in_guestmode(struct vcpu *v);
>  #define nestedhvm_vcpu_exit_guestmode(v)  \
>      vcpu_nestedhvm(v).nv_guestmode = 0
>  
> -/* Nested paging */
> -#define NESTEDHVM_PAGEFAULT_DONE       0
> -#define NESTEDHVM_PAGEFAULT_INJECT     1
> -#define NESTEDHVM_PAGEFAULT_L1_ERROR   2
> -#define NESTEDHVM_PAGEFAULT_L0_ERROR   3
> -#define NESTEDHVM_PAGEFAULT_MMIO       4
> -#define NESTEDHVM_PAGEFAULT_RETRY      5
> -#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
>  int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
>                                      struct npfec npfec);
>  
> @@ -59,6 +63,36 @@ unsigned long *nestedhvm_vcpu_iomap_get(bool ioport_80, bool ioport_ed);
>  
>  void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m);
>  
> +#else /* !CONFIG_NESTED_VIRT */
> +
> +static inline int nestedhvm_vcpu_initialise(struct vcpu *v)
> +{
> +    ASSERT_UNREACHABLE();
> +    return -EOPNOTSUPP;
> +}
> +static inline void nestedhvm_vcpu_destroy(struct vcpu *v) { }
> +static inline void nestedhvm_vcpu_reset(struct vcpu *v)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +static inline bool nestedhvm_vcpu_in_guestmode(struct vcpu *v) { return false; }
> +static inline int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
> +                                                  struct npfec npfec)
> +{
> +    ASSERT_UNREACHABLE();
> +    return NESTEDHVM_PAGEFAULT_L0_ERROR;
> +}
> +#define nestedhvm_vcpu_enter_guestmode(v) do { ASSERT_UNREACHABLE(); } while (0)
> +#define nestedhvm_vcpu_exit_guestmode(v)  do { ASSERT_UNREACHABLE(); } while (0)
> +#define nestedhvm_paging_mode_hap(v) false
> +#define nestedhvm_vmswitch_in_progress(v) false

Why are those defines instead of static inlines?

> +static inline void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +
> +#endif /* CONFIG_NESTED_VIRT */
> +
>  static inline bool nestedhvm_is_n2(struct vcpu *v)
>  {
>      if ( !nestedhvm_enabled(v->domain) ||
> diff --git a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
> index da10d3fa96..ad56cdf01e 100644
> --- a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
> +++ b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
> @@ -73,6 +73,8 @@ union vmx_inst_info {
>      u32 word;
>  };
>  
> +#ifdef CONFIG_NESTED_VIRT
> +
>  int cf_check nvmx_vcpu_initialise(struct vcpu *v);
>  void cf_check nvmx_vcpu_destroy(struct vcpu *v);
>  int cf_check nvmx_vcpu_reset(struct vcpu *v);
> @@ -199,5 +201,65 @@ int nept_translate_l2ga(struct vcpu *v, paddr_t l2ga,
>                          uint64_t *exit_qual, uint32_t *exit_reason);
>  int nvmx_cpu_up_prepare(unsigned int cpu);
>  void nvmx_cpu_dead(unsigned int cpu);
> +
> +#else /* !CONFIG_NESTED_VIRT */
> +
> +static inline void nvmx_update_exec_control(struct vcpu *v, u32 value)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +static inline void nvmx_update_secondary_exec_control(struct vcpu *v,
> +                                                      unsigned long value)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +static inline void nvmx_update_exception_bitmap(struct vcpu *v,
> +                                                unsigned long value)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +static inline u64 nvmx_get_tsc_offset(struct vcpu *v)
> +{
> +    ASSERT_UNREACHABLE();
> +    return 0;
> +}
> +static inline void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +static inline bool nvmx_intercepts_exception(struct vcpu *v, unsigned int vector,
> +                                             int error_code)
> +{
> +    ASSERT_UNREACHABLE();
> +    return false;
> +}
> +static inline int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs,
> +                                         unsigned int exit_reason)
> +{
> +    ASSERT_UNREACHABLE();
> +    return 0;
> +}
> +static inline void nvmx_idtv_handling(void)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +static inline int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content)
> +{
> +    ASSERT_UNREACHABLE();
> +    return 0;
> +}

I think this function is reachable even when nested virt is not
enabled:

vmx_msr_read_intercept() -> case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_VMFUNC -> nvmx_msr_read_intercept()

I'm also confused about why the function returns 0 instead of an error
when !nestedhvm_enabled().  We should probably make it return -ENODEV
when nested virt is not available or enabled.

> +static inline int nvmx_handle_vmx_insn(struct cpu_user_regs *regs,
> +                                       unsigned int exit_reason)
> +{
> +    ASSERT_UNREACHABLE();
> +    return 0;
> +}

Same here, I think this is likely reachable from vmx_vmexit_handler(),
and shouldn't assert?

It should also do something like:

    hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
    return X86EMUL_EXCEPTION;

So it mimics what the function itself does when !nestedhvm_enabled().

> +static inline int nvmx_cpu_up_prepare(unsigned int cpu) { return 0; }
> +static inline void nvmx_cpu_dead(unsigned int cpu) { }
> +
> +#define get_vvmcs(vcpu, encoding) 0

This likely wants to be a static inline and have an
ASSERT_UNREACHABLE()?  From a quick look it seems like call sites do
check that the vpcu is in guest mode before attempting to fetch a
field from the nested VMCS.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] x86/hvm: Add Kconfig option to disable nested virtualization
  2026-02-06 21:05 [PATCH v2] x86/hvm: Add Kconfig option to disable nested virtualization Stefano Stabellini
  2026-02-08  2:31 ` Demi Marie Obenour
  2026-02-09 11:08 ` Roger Pau Monné
@ 2026-02-09 14:55 ` Alejandro Vallejo
  2026-02-09 15:01   ` Jan Beulich
  2026-02-09 15:09 ` Jan Beulich
  3 siblings, 1 reply; 10+ messages in thread
From: Alejandro Vallejo @ 2026-02-09 14:55 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: roger.pau, jbeulich, andrew.cooper3, jason.andryuk

On Fri Feb 6, 2026 at 10:05 PM CET, Stefano Stabellini wrote:
> Introduce CONFIG_NESTED_VIRT (default y, requires EXPERT to disable)
> to allow nested virtualization support to be disabled at build time.
> This is useful for embedded or safety-focused deployments where
> nested virtualization is not needed, reducing code size and attack
> surface.
>
> When CONFIG_NESTED_VIRT=n, the following source files are excluded:
> - arch/x86/hvm/nestedhvm.c
> - arch/x86/hvm/svm/nestedsvm.c
> - arch/x86/hvm/vmx/vvmx.c
> - arch/x86/mm/nested.c
> - arch/x86/mm/hap/nested_hap.c
> - arch/x86/mm/hap/nested_ept.c

Good riddance.

>
> Add inline stubs where needed in headers.

I'm fairly sure we can get rid much of this diff by curring off earlier.

>
> No functional change when CONFIG_NESTED_VIRT=y.
>
> Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
> ---
> Changes in v2:
> - add ASSERT_UNREACHABLE
> - change default to N and remove EXPERT
> - don't compile nested_hap if not VMX
> - add XEN_SYSCTL_PHYSCAP_nestedhvm
> - add IS_ENABLED check in hvm_nested_virt_supported 
> ---
>  xen/arch/x86/hvm/Kconfig                 | 10 ++++
>  xen/arch/x86/hvm/Makefile                |  2 +-
>  xen/arch/x86/hvm/svm/Makefile            |  2 +-
>  xen/arch/x86/hvm/svm/nestedhvm.h         | 60 +++++++++++++++++++++--
>  xen/arch/x86/hvm/svm/svm.c               |  6 +++
>  xen/arch/x86/hvm/vmx/Makefile            |  2 +-
>  xen/arch/x86/hvm/vmx/vmx.c               | 10 +++-
>  xen/arch/x86/include/asm/hvm/hvm.h       |  2 +-
>  xen/arch/x86/include/asm/hvm/nestedhvm.h | 52 ++++++++++++++++----
>  xen/arch/x86/include/asm/hvm/vmx/vvmx.h  | 62 ++++++++++++++++++++++++
>  xen/arch/x86/mm/Makefile                 |  2 +-
>  xen/arch/x86/mm/hap/Makefile             |  4 +-
>  xen/arch/x86/mm/p2m.h                    |  6 +++
>  xen/arch/x86/sysctl.c                    |  2 +
>  xen/include/public/sysctl.h              |  4 +-
>  15 files changed, 202 insertions(+), 24 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/Kconfig b/xen/arch/x86/hvm/Kconfig
> index f32bf5cbb7..133f19a063 100644
> --- a/xen/arch/x86/hvm/Kconfig
> +++ b/xen/arch/x86/hvm/Kconfig
> @@ -92,4 +92,14 @@ config MEM_SHARING
>  	bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED
>  	depends on INTEL_VMX
>  
> +config NESTED_VIRT
> +	bool "Nested virtualization support"
> +	depends on AMD_SVM || INTEL_VMX
> +	default n
> +	help
> +	  Enable nested virtualization, allowing guests to run their own
> +	  hypervisors. This requires hardware support.
> +
> +	  If unsure, say N.
> +
>  endif
> diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
> index f34fb03934..b8a0a68624 100644
> --- a/xen/arch/x86/hvm/Makefile
> +++ b/xen/arch/x86/hvm/Makefile
> @@ -18,7 +18,7 @@ obj-y += irq.o
>  obj-y += mmio.o
>  obj-$(CONFIG_VM_EVENT) += monitor.o
>  obj-y += mtrr.o
> -obj-y += nestedhvm.o
> +obj-$(CONFIG_NESTED_VIRT) += nestedhvm.o
>  obj-y += pmtimer.o
>  obj-y += quirks.o
>  obj-y += rtc.o
> diff --git a/xen/arch/x86/hvm/svm/Makefile b/xen/arch/x86/hvm/svm/Makefile
> index 8a072cafd5..92418e3444 100644
> --- a/xen/arch/x86/hvm/svm/Makefile
> +++ b/xen/arch/x86/hvm/svm/Makefile
> @@ -2,6 +2,6 @@ obj-y += asid.o
>  obj-y += emulate.o
>  obj-bin-y += entry.o
>  obj-y += intr.o
> -obj-y += nestedsvm.o
> +obj-$(CONFIG_NESTED_VIRT) += nestedsvm.o
>  obj-y += svm.o
>  obj-y += vmcb.o
> diff --git a/xen/arch/x86/hvm/svm/nestedhvm.h b/xen/arch/x86/hvm/svm/nestedhvm.h
> index 9bfed5ffd7..ed1aa847e5 100644
> --- a/xen/arch/x86/hvm/svm/nestedhvm.h
> +++ b/xen/arch/x86/hvm/svm/nestedhvm.h
> @@ -26,6 +26,13 @@
>  #define nsvm_efer_svm_enabled(v) \
>      (!!((v)->arch.hvm.guest_efer & EFER_SVME))

This should be IS_ENABLED(CONFIG_NESTED_VIRT) && (!!((v)->arch.hvm.guest_efer & EFER_SVME))

Then DCE removes all calls to the stubs you had to add, making the stubs
redundant. the stgi and clgi handlers need new helpers like those of VMRUN, VMLOAD and
VMSAVE so that svm.c can dispatch #UD to the guest rather than relying on nestedsvm.c
doing so. Afterwards you endup with a cleaner nestedhvm.h with no stubs and
all logic removed by DCE.

>  
>  /*
> diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
> index 18ba837738..0234b57afb 100644
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -46,6 +46,10 @@
>  
>  void noreturn svm_asm_do_resume(void);
>  
> +#ifndef CONFIG_NESTED_VIRT
> +void asmlinkage nsvm_vcpu_switch(void) { }
> +#endif

Rather than doing this, why not compile out the calls in entry.S?

While at it, there's the hunk about p2m stealing we can also get rid of:


	        cmp  %cl,VCPU_nsvm_hap_enabled(%rbx)
	UNLIKELY_START(ne, nsvm_hap)
	        cmp  %rcx,VCPU_nhvm_p2m(%rbx)
	        sete %al
	        test VCPU_nhvm_guestmode(%rbx),%al
	        UNLIKELY_DONE(z, nsvm_hap)
	        /*
	         * Someone shot down our nested p2m table; go round again
	         * and nsvm_vcpu_switch() will fix it for us.
	         */
	        sti
	        jmp  .Lsvm_do_resume
	__UNLIKELY_END(nsvm_hap)


> +
>  u32 svm_feature_flags;
>  
>  /*
> @@ -2465,6 +2469,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
>      .set_rdtsc_exiting    = svm_set_rdtsc_exiting,
>      .get_insn_bytes       = svm_get_insn_bytes,
>  
> +#ifdef CONFIG_NESTED_VIRT
>      .nhvm_vcpu_initialise = nsvm_vcpu_initialise,
>      .nhvm_vcpu_destroy = nsvm_vcpu_destroy,
>      .nhvm_vcpu_reset = nsvm_vcpu_reset,
> @@ -2474,6 +2479,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
>      .nhvm_vmcx_hap_enabled = nsvm_vmcb_hap_enabled,
>      .nhvm_intr_blocked = nsvm_intr_blocked,
>      .nhvm_hap_walk_L1_p2m = nsvm_hap_walk_L1_p2m,
> +#endif
>  
>      .get_reg = svm_get_reg,
>      .set_reg = svm_set_reg,
> diff --git a/xen/arch/x86/hvm/vmx/Makefile b/xen/arch/x86/hvm/vmx/Makefile
> index 04a29ce59d..902564b3e2 100644
> --- a/xen/arch/x86/hvm/vmx/Makefile
> +++ b/xen/arch/x86/hvm/vmx/Makefile
> @@ -3,4 +3,4 @@ obj-y += intr.o
>  obj-y += realmode.o
>  obj-y += vmcs.o
>  obj-y += vmx.o
> -obj-y += vvmx.o
> +obj-$(CONFIG_NESTED_VIRT) += vvmx.o
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 82c55f49ae..252f27322b 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -55,6 +55,10 @@
>  #include <public/hvm/save.h>
>  #include <public/sched.h>
>  
> +#ifndef CONFIG_NESTED_VIRT
> +void asmlinkage nvmx_switch_guest(void) { }
> +#endif

Same as for VMX. I didn't check the rest of it, but the same idea should apply.

Cheers,
Alejandro


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] x86/hvm: Add Kconfig option to disable nested virtualization
  2026-02-09 14:55 ` Alejandro Vallejo
@ 2026-02-09 15:01   ` Jan Beulich
  0 siblings, 0 replies; 10+ messages in thread
From: Jan Beulich @ 2026-02-09 15:01 UTC (permalink / raw)
  To: Alejandro Vallejo, Stefano Stabellini
  Cc: roger.pau, andrew.cooper3, jason.andryuk, xen-devel

On 09.02.2026 15:55, Alejandro Vallejo wrote:
> On Fri Feb 6, 2026 at 10:05 PM CET, Stefano Stabellini wrote:
>> --- a/xen/arch/x86/hvm/svm/nestedhvm.h
>> +++ b/xen/arch/x86/hvm/svm/nestedhvm.h
>> @@ -26,6 +26,13 @@
>>  #define nsvm_efer_svm_enabled(v) \
>>      (!!((v)->arch.hvm.guest_efer & EFER_SVME))
> 
> This should be IS_ENABLED(CONFIG_NESTED_VIRT) && (!!((v)->arch.hvm.guest_efer & EFER_SVME))

And then, btw, the !! also dropped and the wrapping opening parenthesis moved.

Jan


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] x86/hvm: Add Kconfig option to disable nested virtualization
  2026-02-06 21:05 [PATCH v2] x86/hvm: Add Kconfig option to disable nested virtualization Stefano Stabellini
                   ` (2 preceding siblings ...)
  2026-02-09 14:55 ` Alejandro Vallejo
@ 2026-02-09 15:09 ` Jan Beulich
  2026-02-13 21:46   ` Stefano Stabellini
  3 siblings, 1 reply; 10+ messages in thread
From: Jan Beulich @ 2026-02-09 15:09 UTC (permalink / raw)
  To: Stefano Stabellini
  Cc: roger.pau, andrew.cooper3, jason.andryuk, alejandro.garciavallejo,
	xen-devel

On 06.02.2026 22:05, Stefano Stabellini wrote:
> --- a/xen/arch/x86/hvm/Kconfig
> +++ b/xen/arch/x86/hvm/Kconfig
> @@ -92,4 +92,14 @@ config MEM_SHARING
>  	bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED
>  	depends on INTEL_VMX
>  
> +config NESTED_VIRT
> +	bool "Nested virtualization support"
> +	depends on AMD_SVM || INTEL_VMX

Should be HVM? Or else have separate NESTED_SVM and NESTED_VMX?

> +	default n

Please omit such a redundant line.

> --- a/xen/arch/x86/hvm/svm/nestedhvm.h
> +++ b/xen/arch/x86/hvm/svm/nestedhvm.h
> @@ -26,6 +26,13 @@
>  #define nsvm_efer_svm_enabled(v) \
>      (!!((v)->arch.hvm.guest_efer & EFER_SVME))
>  
> +#define NSVM_INTR_NOTHANDLED     3
> +#define NSVM_INTR_NOTINTERCEPTED 2
> +#define NSVM_INTR_FORCEVMEXIT    1
> +#define NSVM_INTR_MASKED         0

It feels suspicious that all of these need moving ...

> +#ifdef CONFIG_NESTED_VIRT

... ahead of this.

> --- a/xen/arch/x86/include/asm/hvm/nestedhvm.h
> +++ b/xen/arch/x86/include/asm/hvm/nestedhvm.h
> @@ -25,9 +25,21 @@ enum nestedhvm_vmexits {
>  /* Nested HVM on/off per domain */
>  static inline bool nestedhvm_enabled(const struct domain *d)
>  {
> -    return IS_ENABLED(CONFIG_HVM) && (d->options & XEN_DOMCTL_CDF_nested_virt);
> +    return IS_ENABLED(CONFIG_NESTED_VIRT) &&
> +           (d->options & XEN_DOMCTL_CDF_nested_virt);
>  }
>  
> +/* Nested paging */
> +#define NESTEDHVM_PAGEFAULT_DONE       0
> +#define NESTEDHVM_PAGEFAULT_INJECT     1
> +#define NESTEDHVM_PAGEFAULT_L1_ERROR   2
> +#define NESTEDHVM_PAGEFAULT_L0_ERROR   3
> +#define NESTEDHVM_PAGEFAULT_MMIO       4
> +#define NESTEDHVM_PAGEFAULT_RETRY      5
> +#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
> +
> +#ifdef CONFIG_NESTED_VIRT

Same here.

> --- a/xen/arch/x86/mm/hap/Makefile
> +++ b/xen/arch/x86/mm/hap/Makefile
> @@ -2,5 +2,5 @@ obj-y += hap.o
>  obj-y += guest_walk_2.o
>  obj-y += guest_walk_3.o
>  obj-y += guest_walk_4.o
> -obj-y += nested_hap.o
> -obj-$(CONFIG_INTEL_VMX) += nested_ept.o
> +obj-$(CONFIG_NESTED_VIRT) += nested_hap.o
> +obj-$(filter $(CONFIG_NESTED_VIRT),$(CONFIG_INTEL_VMX)) += nested_ept.o

Maybe slightly easier to read as

nested-y := nested_hap.o
nested-$(CONFIG_INTEL_VMX) += nested_ept.o
obj-$(CONFIG_NESTED_VIRT) += $(nested-y)

> --- a/xen/arch/x86/sysctl.c
> +++ b/xen/arch/x86/sysctl.c
> @@ -103,6 +103,8 @@ void arch_do_physinfo(struct xen_sysctl_physinfo *pi)
>          pi->capabilities |= XEN_SYSCTL_PHYSCAP_hap;
>      if ( IS_ENABLED(CONFIG_SHADOW_PAGING) )
>          pi->capabilities |= XEN_SYSCTL_PHYSCAP_shadow;
> +    if ( hvm_nested_virt_supported() )
> +        pi->capabilities |= XEN_SYSCTL_PHYSCAP_nestedhvm;
>  }
>  
>  long arch_do_sysctl(
> --- a/xen/include/public/sysctl.h
> +++ b/xen/include/public/sysctl.h
> @@ -100,9 +100,11 @@ struct xen_sysctl_tbuf_op {
>  /* Xen supports the Grant v1 and/or v2 ABIs. */
>  #define XEN_SYSCTL_PHYSCAP_gnttab_v1     (1u << 8)
>  #define XEN_SYSCTL_PHYSCAP_gnttab_v2     (1u << 9)
> +/* The platform supports nested HVM. */
> +#define XEN_SYSCTL_PHYSCAP_nestedhvm     (1u << 10)

Doesn't this want introducing up front, for the tool stack to make use of?

Jan


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] x86/hvm: Add Kconfig option to disable nested virtualization
  2026-02-09 15:09 ` Jan Beulich
@ 2026-02-13 21:46   ` Stefano Stabellini
  2026-02-16  8:57     ` Jan Beulich
  0 siblings, 1 reply; 10+ messages in thread
From: Stefano Stabellini @ 2026-02-13 21:46 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stefano Stabellini, roger.pau, andrew.cooper3, jason.andryuk,
	alejandro.garciavallejo, xen-devel

On Mon, 9 Feb 2026, Jan Beulich wrote:
> On 06.02.2026 22:05, Stefano Stabellini wrote:
> > --- a/xen/arch/x86/hvm/Kconfig
> > +++ b/xen/arch/x86/hvm/Kconfig
> > @@ -92,4 +92,14 @@ config MEM_SHARING
> >  	bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED
> >  	depends on INTEL_VMX
> >  
> > +config NESTED_VIRT
> > +	bool "Nested virtualization support"
> > +	depends on AMD_SVM || INTEL_VMX
> 
> Should be HVM? Or else have separate NESTED_SVM and NESTED_VMX?
> 
> > +	default n
> 
> Please omit such a redundant line.

done and done

> > --- a/xen/arch/x86/hvm/svm/nestedhvm.h
> > +++ b/xen/arch/x86/hvm/svm/nestedhvm.h
> > @@ -26,6 +26,13 @@
> >  #define nsvm_efer_svm_enabled(v) \
> >      (!!((v)->arch.hvm.guest_efer & EFER_SVME))
> >  
> > +#define NSVM_INTR_NOTHANDLED     3
> > +#define NSVM_INTR_NOTINTERCEPTED 2
> > +#define NSVM_INTR_FORCEVMEXIT    1
> > +#define NSVM_INTR_MASKED         0
> 
> It feels suspicious that all of these need moving ...
> 
> > +#ifdef CONFIG_NESTED_VIRT
> 
> ... ahead of this.

Not needed anymore


> > --- a/xen/arch/x86/include/asm/hvm/nestedhvm.h
> > +++ b/xen/arch/x86/include/asm/hvm/nestedhvm.h
> > @@ -25,9 +25,21 @@ enum nestedhvm_vmexits {
> >  /* Nested HVM on/off per domain */
> >  static inline bool nestedhvm_enabled(const struct domain *d)
> >  {
> > -    return IS_ENABLED(CONFIG_HVM) && (d->options & XEN_DOMCTL_CDF_nested_virt);
> > +    return IS_ENABLED(CONFIG_NESTED_VIRT) &&
> > +           (d->options & XEN_DOMCTL_CDF_nested_virt);
> >  }
> >  
> > +/* Nested paging */
> > +#define NESTEDHVM_PAGEFAULT_DONE       0
> > +#define NESTEDHVM_PAGEFAULT_INJECT     1
> > +#define NESTEDHVM_PAGEFAULT_L1_ERROR   2
> > +#define NESTEDHVM_PAGEFAULT_L0_ERROR   3
> > +#define NESTEDHVM_PAGEFAULT_MMIO       4
> > +#define NESTEDHVM_PAGEFAULT_RETRY      5
> > +#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
> > +
> > +#ifdef CONFIG_NESTED_VIRT
> 
> Same here.

the stub nestedhvm_hap_nested_page_fault() returns
NESTEDHVM_PAGEFAULT_L0_ERROR, so these defines must be available
unconditionally.


> > --- a/xen/arch/x86/mm/hap/Makefile
> > +++ b/xen/arch/x86/mm/hap/Makefile
> > @@ -2,5 +2,5 @@ obj-y += hap.o
> >  obj-y += guest_walk_2.o
> >  obj-y += guest_walk_3.o
> >  obj-y += guest_walk_4.o
> > -obj-y += nested_hap.o
> > -obj-$(CONFIG_INTEL_VMX) += nested_ept.o
> > +obj-$(CONFIG_NESTED_VIRT) += nested_hap.o
> > +obj-$(filter $(CONFIG_NESTED_VIRT),$(CONFIG_INTEL_VMX)) += nested_ept.o
> 
> Maybe slightly easier to read as
> 
> nested-y := nested_hap.o
> nested-$(CONFIG_INTEL_VMX) += nested_ept.o
> obj-$(CONFIG_NESTED_VIRT) += $(nested-y)

OK
    

> > --- a/xen/arch/x86/sysctl.c
> > +++ b/xen/arch/x86/sysctl.c
> > @@ -103,6 +103,8 @@ void arch_do_physinfo(struct xen_sysctl_physinfo *pi)
> >          pi->capabilities |= XEN_SYSCTL_PHYSCAP_hap;
> >      if ( IS_ENABLED(CONFIG_SHADOW_PAGING) )
> >          pi->capabilities |= XEN_SYSCTL_PHYSCAP_shadow;
> > +    if ( hvm_nested_virt_supported() )
> > +        pi->capabilities |= XEN_SYSCTL_PHYSCAP_nestedhvm;
> >  }
> >  
> >  long arch_do_sysctl(
> > --- a/xen/include/public/sysctl.h
> > +++ b/xen/include/public/sysctl.h
> > @@ -100,9 +100,11 @@ struct xen_sysctl_tbuf_op {
> >  /* Xen supports the Grant v1 and/or v2 ABIs. */
> >  #define XEN_SYSCTL_PHYSCAP_gnttab_v1     (1u << 8)
> >  #define XEN_SYSCTL_PHYSCAP_gnttab_v2     (1u << 9)
> > +/* The platform supports nested HVM. */
> > +#define XEN_SYSCTL_PHYSCAP_nestedhvm     (1u << 10)
> 
> Doesn't this want introducing up front, for the tool stack to make use of?

What do you mean by "up front" in this context? In a separate toolstack
patch?


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] x86/hvm: Add Kconfig option to disable nested virtualization
  2026-02-09 11:08 ` Roger Pau Monné
@ 2026-02-13 21:56   ` Stefano Stabellini
  2026-02-16  9:08     ` Roger Pau Monné
  0 siblings, 1 reply; 10+ messages in thread
From: Stefano Stabellini @ 2026-02-13 21:56 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Stefano Stabellini, xen-devel, jbeulich, andrew.cooper3,
	jason.andryuk, alejandro.garciavallejo

[-- Attachment #1: Type: text/plain, Size: 1589 bytes --]

I address all other comments


On Mon, 9 Feb 2026, Roger Pau Monné wrote:
> > +static inline int nvmx_msr_read_intercept(unsigned int msr, u64
> > *msr_content)
> > +{
> > +    ASSERT_UNREACHABLE();
> > +    return 0;
> > +}
> >
> I think this function is reachable even when nested virt is not
> enabled:
> 
> vmx_msr_read_intercept() -> case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_VMFUNC -> nvmx_msr_read_intercept()
> 
> I'm also confused about why the function returns 0 instead of an error
> when !nestedhvm_enabled().  We should probably make it return -ENODEV
> when nested virt is not available or enabled.

I agree on the way of thinking but if we return zero it will goto gp_fault.
So I'll just remove ASSERT_UNREACHABLE.


> > +static inline int nvmx_handle_vmx_insn(struct cpu_user_regs *regs,
> > +                                       unsigned int exit_reason)
> > +{
> > +    ASSERT_UNREACHABLE();
> > +    return 0;
> > +}
> 
> Same here, I think this is likely reachable from vmx_vmexit_handler(),
> and shouldn't assert?
> 
> It should also do something like:
> 
>     hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
>     return X86EMUL_EXCEPTION;
> 
> So it mimics what the function itself does when !nestedhvm_enabled().

hvm_inject_hw_exception cannot be easily called here because it is not
available at this point in the header. But actually this function should
be unreachable because when !CONFIG_NESTED_VIRT, CR4.VMXE is not
a valid guest CR4 bit, so nested VMX instructions should cause #UD?

I changed it to:

ASSERT_UNREACHABLE();
return X86EMUL_EXCEPTION;

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] x86/hvm: Add Kconfig option to disable nested virtualization
  2026-02-13 21:46   ` Stefano Stabellini
@ 2026-02-16  8:57     ` Jan Beulich
  0 siblings, 0 replies; 10+ messages in thread
From: Jan Beulich @ 2026-02-16  8:57 UTC (permalink / raw)
  To: Stefano Stabellini
  Cc: Stefano Stabellini, roger.pau, andrew.cooper3, jason.andryuk,
	alejandro.garciavallejo, xen-devel

On 13.02.2026 22:46, Stefano Stabellini wrote:
> On Mon, 9 Feb 2026, Jan Beulich wrote:
>> On 06.02.2026 22:05, Stefano Stabellini wrote:
>>> --- a/xen/arch/x86/sysctl.c
>>> +++ b/xen/arch/x86/sysctl.c
>>> @@ -103,6 +103,8 @@ void arch_do_physinfo(struct xen_sysctl_physinfo *pi)
>>>          pi->capabilities |= XEN_SYSCTL_PHYSCAP_hap;
>>>      if ( IS_ENABLED(CONFIG_SHADOW_PAGING) )
>>>          pi->capabilities |= XEN_SYSCTL_PHYSCAP_shadow;
>>> +    if ( hvm_nested_virt_supported() )
>>> +        pi->capabilities |= XEN_SYSCTL_PHYSCAP_nestedhvm;
>>>  }
>>>  
>>>  long arch_do_sysctl(
>>> --- a/xen/include/public/sysctl.h
>>> +++ b/xen/include/public/sysctl.h
>>> @@ -100,9 +100,11 @@ struct xen_sysctl_tbuf_op {
>>>  /* Xen supports the Grant v1 and/or v2 ABIs. */
>>>  #define XEN_SYSCTL_PHYSCAP_gnttab_v1     (1u << 8)
>>>  #define XEN_SYSCTL_PHYSCAP_gnttab_v2     (1u << 9)
>>> +/* The platform supports nested HVM. */
>>> +#define XEN_SYSCTL_PHYSCAP_nestedhvm     (1u << 10)
>>
>> Doesn't this want introducing up front, for the tool stack to make use of?
> 
> What do you mean by "up front" in this context? In a separate toolstack
> patch?

Yes. Without the toolstack part the introduction of this flag is cosmetic
only, which isn't what we want.

Jan


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] x86/hvm: Add Kconfig option to disable nested virtualization
  2026-02-13 21:56   ` Stefano Stabellini
@ 2026-02-16  9:08     ` Roger Pau Monné
  0 siblings, 0 replies; 10+ messages in thread
From: Roger Pau Monné @ 2026-02-16  9:08 UTC (permalink / raw)
  To: Stefano Stabellini
  Cc: Stefano Stabellini, xen-devel, jbeulich, andrew.cooper3,
	jason.andryuk, alejandro.garciavallejo

On Fri, Feb 13, 2026 at 01:56:34PM -0800, Stefano Stabellini wrote:
> I address all other comments
> 
> 
> On Mon, 9 Feb 2026, Roger Pau Monné wrote:
> > > +static inline int nvmx_msr_read_intercept(unsigned int msr, u64
> > > *msr_content)
> > > +{
> > > +    ASSERT_UNREACHABLE();
> > > +    return 0;
> > > +}
> > >
> > I think this function is reachable even when nested virt is not
> > enabled:
> > 
> > vmx_msr_read_intercept() -> case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_VMFUNC -> nvmx_msr_read_intercept()
> > 
> > I'm also confused about why the function returns 0 instead of an error
> > when !nestedhvm_enabled().  We should probably make it return -ENODEV
> > when nested virt is not available or enabled.

Oh, I see.  The return type of that function is weird.  It should be
adjusted to bool (not that you need to do it here).

> I agree on the way of thinking but if we return zero it will goto gp_fault.
> So I'll just remove ASSERT_UNREACHABLE.

Ack.

> 
> > > +static inline int nvmx_handle_vmx_insn(struct cpu_user_regs *regs,
> > > +                                       unsigned int exit_reason)
> > > +{
> > > +    ASSERT_UNREACHABLE();
> > > +    return 0;
> > > +}
> > 
> > Same here, I think this is likely reachable from vmx_vmexit_handler(),
> > and shouldn't assert?
> > 
> > It should also do something like:
> > 
> >     hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
> >     return X86EMUL_EXCEPTION;
> > 
> > So it mimics what the function itself does when !nestedhvm_enabled().
> 
> hvm_inject_hw_exception cannot be easily called here because it is not
> available at this point in the header. But actually this function should
> be unreachable because when !CONFIG_NESTED_VIRT, CR4.VMXE is not
> a valid guest CR4 bit, so nested VMX instructions should cause #UD?

I'm not sure about nvmx_handle_vmx_insn() being unreachable when
CR4.VMXE is not set, the Intel SDM states:

"26.1.2 Instructions That Cause VM Exits Unconditionally

The following instructions cause VM exits when they are executed in
VMX non-root operation: CPUID, GETSEC,1 INVD, and XSETBV. This is also
true of instructions introduced with VMX, which include: INVEPT,
INVVPID, VMCALL,2 VMCLEAR, VMLAUNCH, VMPTRLD, VMPTRST, VMRESUME,
VMXOFF, and VMXON."

My reading is that regardless of the value of CR4.VMXE the execution
of VMX instructions will cause a VMEXIT, and it's the hypervisor task
to figure out what to do.

> 
> I changed it to:
> 
> ASSERT_UNREACHABLE();
> return X86EMUL_EXCEPTION;

If the above is correct, the ASSERT_UNREACHABLE() is reachable.

Returning X86EMUL_EXCEPTION without actually having injected the
exception will result in a loop I think, as vmx_vmexit_handler() will
resume guest execution without having advanced the instruction
pointer, and without having injected an exception, and hence another
VMEXIT will trigger.

IMO the best way to solve this is to make vmx_vmexit_handler() inject
#UD when nvmx_handle_vmx_insn() returns X86EMUL_EXCEPTION. You will
need to then also adjust the non-stub version of
nvmx_handle_vmx_insn(), so it doesn't inject #UD itself.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2026-02-16  9:09 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-06 21:05 [PATCH v2] x86/hvm: Add Kconfig option to disable nested virtualization Stefano Stabellini
2026-02-08  2:31 ` Demi Marie Obenour
2026-02-09 11:08 ` Roger Pau Monné
2026-02-13 21:56   ` Stefano Stabellini
2026-02-16  9:08     ` Roger Pau Monné
2026-02-09 14:55 ` Alejandro Vallejo
2026-02-09 15:01   ` Jan Beulich
2026-02-09 15:09 ` Jan Beulich
2026-02-13 21:46   ` Stefano Stabellini
2026-02-16  8:57     ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.