All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86/hvm: Add Kconfig option to disable nested virtualization
@ 2026-02-06  1:50 Stefano Stabellini
  2026-02-06  4:07 ` Demi Marie Obenour
  2026-02-06  8:15 ` Roger Pau Monné
  0 siblings, 2 replies; 10+ messages in thread
From: Stefano Stabellini @ 2026-02-06  1:50 UTC (permalink / raw)
  To: xen-devel
  Cc: jbeulich, andrew.cooper3, roger.pau, jason.andryuk,
	alejandro.garciavallejo, Stefano Stabellini

Introduce CONFIG_NESTED_VIRT (default y, requires EXPERT to disable)
to allow nested virtualization support to be disabled at build time.
This is useful for embedded or safety-focused deployments where
nested virtualization is not needed, reducing code size and attack
surface.

When CONFIG_NESTED_VIRT=n, the following source files are excluded:
- arch/x86/hvm/nestedhvm.c
- arch/x86/hvm/svm/nestedsvm.c
- arch/x86/hvm/vmx/vvmx.c
- arch/x86/mm/nested.c
- arch/x86/mm/hap/nested_hap.c
- arch/x86/mm/hap/nested_ept.c

Add inline stubs where needed in headers.

No functional change when CONFIG_NESTED_VIRT=y.

Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
---
 xen/arch/x86/hvm/Kconfig                 | 10 ++++++
 xen/arch/x86/hvm/Makefile                |  2 +-
 xen/arch/x86/hvm/svm/Makefile            |  2 +-
 xen/arch/x86/hvm/svm/nestedhvm.h         | 44 +++++++++++++++++++++---
 xen/arch/x86/hvm/svm/svm.c               |  6 ++++
 xen/arch/x86/hvm/vmx/Makefile            |  2 +-
 xen/arch/x86/hvm/vmx/vmx.c               | 10 ++++--
 xen/arch/x86/include/asm/hvm/nestedhvm.h | 41 +++++++++++++++++-----
 xen/arch/x86/include/asm/hvm/vmx/vvmx.h  | 30 ++++++++++++++++
 xen/arch/x86/mm/Makefile                 |  2 +-
 xen/arch/x86/mm/hap/Makefile             |  4 +--
 xen/arch/x86/mm/p2m.h                    |  6 ++++
 12 files changed, 137 insertions(+), 22 deletions(-)

diff --git a/xen/arch/x86/hvm/Kconfig b/xen/arch/x86/hvm/Kconfig
index f32bf5cbb7..12b5df4710 100644
--- a/xen/arch/x86/hvm/Kconfig
+++ b/xen/arch/x86/hvm/Kconfig
@@ -92,4 +92,14 @@ config MEM_SHARING
 	bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED
 	depends on INTEL_VMX
 
+config NESTED_VIRT
+	bool "Nested virtualization support" if EXPERT
+	depends on AMD_SVM || INTEL_VMX
+	default y
+	help
+	  Enable nested virtualization, allowing guests to run their own
+	  hypervisors. This requires hardware support.
+
+	  If unsure, say Y.
+
 endif
diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
index f34fb03934..b8a0a68624 100644
--- a/xen/arch/x86/hvm/Makefile
+++ b/xen/arch/x86/hvm/Makefile
@@ -18,7 +18,7 @@ obj-y += irq.o
 obj-y += mmio.o
 obj-$(CONFIG_VM_EVENT) += monitor.o
 obj-y += mtrr.o
-obj-y += nestedhvm.o
+obj-$(CONFIG_NESTED_VIRT) += nestedhvm.o
 obj-y += pmtimer.o
 obj-y += quirks.o
 obj-y += rtc.o
diff --git a/xen/arch/x86/hvm/svm/Makefile b/xen/arch/x86/hvm/svm/Makefile
index 8a072cafd5..92418e3444 100644
--- a/xen/arch/x86/hvm/svm/Makefile
+++ b/xen/arch/x86/hvm/svm/Makefile
@@ -2,6 +2,6 @@ obj-y += asid.o
 obj-y += emulate.o
 obj-bin-y += entry.o
 obj-y += intr.o
-obj-y += nestedsvm.o
+obj-$(CONFIG_NESTED_VIRT) += nestedsvm.o
 obj-y += svm.o
 obj-y += vmcb.o
diff --git a/xen/arch/x86/hvm/svm/nestedhvm.h b/xen/arch/x86/hvm/svm/nestedhvm.h
index 9bfed5ffd7..a102c076ea 100644
--- a/xen/arch/x86/hvm/svm/nestedhvm.h
+++ b/xen/arch/x86/hvm/svm/nestedhvm.h
@@ -26,6 +26,13 @@
 #define nsvm_efer_svm_enabled(v) \
     (!!((v)->arch.hvm.guest_efer & EFER_SVME))
 
+#define NSVM_INTR_NOTHANDLED     3
+#define NSVM_INTR_NOTINTERCEPTED 2
+#define NSVM_INTR_FORCEVMEXIT    1
+#define NSVM_INTR_MASKED         0
+
+#ifdef CONFIG_NESTED_VIRT
+
 int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr);
 void nestedsvm_vmexit_defer(struct vcpu *v,
     uint64_t exitcode, uint64_t exitinfo1, uint64_t exitinfo2);
@@ -57,13 +64,40 @@ int cf_check nsvm_hap_walk_L1_p2m(
     struct vcpu *v, paddr_t L2_gpa, paddr_t *L1_gpa, unsigned int *page_order,
     uint8_t *p2m_acc, struct npfec npfec);
 
-#define NSVM_INTR_NOTHANDLED     3
-#define NSVM_INTR_NOTINTERCEPTED 2
-#define NSVM_INTR_FORCEVMEXIT    1
-#define NSVM_INTR_MASKED         0
-
 int nestedsvm_vcpu_interrupt(struct vcpu *v, const struct hvm_intack intack);
 
+#else /* !CONFIG_NESTED_VIRT */
+
+static inline int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr)
+{
+    return 0;
+}
+static inline void nestedsvm_vmexit_defer(struct vcpu *v,
+    uint64_t exitcode, uint64_t exitinfo1, uint64_t exitinfo2) { }
+static inline enum nestedhvm_vmexits nestedsvm_vmexit_n2n1(struct vcpu *v,
+    struct cpu_user_regs *regs)
+{
+    return NESTEDHVM_VMEXIT_ERROR;
+}
+static inline enum nestedhvm_vmexits nestedsvm_check_intercepts(struct vcpu *v,
+    struct cpu_user_regs *regs, uint64_t exitcode)
+{
+    return NESTEDHVM_VMEXIT_ERROR;
+}
+static inline void svm_nested_features_on_efer_update(struct vcpu *v) { }
+static inline void svm_vmexit_do_clgi(struct cpu_user_regs *regs,
+                                      struct vcpu *v) { }
+static inline void svm_vmexit_do_stgi(struct cpu_user_regs *regs,
+                                       struct vcpu *v) { }
+static inline bool nestedsvm_gif_isset(struct vcpu *v) { return true; }
+static inline int nestedsvm_vcpu_interrupt(struct vcpu *v,
+                                           const struct hvm_intack intack)
+{
+    return NSVM_INTR_NOTINTERCEPTED;
+}
+
+#endif /* CONFIG_NESTED_VIRT */
+
 #endif /* __X86_HVM_SVM_NESTEDHVM_PRIV_H__ */
 
 /*
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 18ba837738..0234b57afb 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -46,6 +46,10 @@
 
 void noreturn svm_asm_do_resume(void);
 
+#ifndef CONFIG_NESTED_VIRT
+void asmlinkage nsvm_vcpu_switch(void) { }
+#endif
+
 u32 svm_feature_flags;
 
 /*
@@ -2465,6 +2469,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
     .set_rdtsc_exiting    = svm_set_rdtsc_exiting,
     .get_insn_bytes       = svm_get_insn_bytes,
 
+#ifdef CONFIG_NESTED_VIRT
     .nhvm_vcpu_initialise = nsvm_vcpu_initialise,
     .nhvm_vcpu_destroy = nsvm_vcpu_destroy,
     .nhvm_vcpu_reset = nsvm_vcpu_reset,
@@ -2474,6 +2479,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
     .nhvm_vmcx_hap_enabled = nsvm_vmcb_hap_enabled,
     .nhvm_intr_blocked = nsvm_intr_blocked,
     .nhvm_hap_walk_L1_p2m = nsvm_hap_walk_L1_p2m,
+#endif
 
     .get_reg = svm_get_reg,
     .set_reg = svm_set_reg,
diff --git a/xen/arch/x86/hvm/vmx/Makefile b/xen/arch/x86/hvm/vmx/Makefile
index 04a29ce59d..902564b3e2 100644
--- a/xen/arch/x86/hvm/vmx/Makefile
+++ b/xen/arch/x86/hvm/vmx/Makefile
@@ -3,4 +3,4 @@ obj-y += intr.o
 obj-y += realmode.o
 obj-y += vmcs.o
 obj-y += vmx.o
-obj-y += vvmx.o
+obj-$(CONFIG_NESTED_VIRT) += vvmx.o
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 82c55f49ae..252f27322b 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -55,6 +55,10 @@
 #include <public/hvm/save.h>
 #include <public/sched.h>
 
+#ifndef CONFIG_NESTED_VIRT
+void asmlinkage nvmx_switch_guest(void) { }
+#endif
+
 static bool __initdata opt_force_ept;
 boolean_param("force-ept", opt_force_ept);
 
@@ -2033,7 +2037,7 @@ static void nvmx_enqueue_n2_exceptions(struct vcpu *v,
                  nvmx->intr.intr_info, nvmx->intr.error_code);
 }
 
-static int cf_check nvmx_vmexit_event(
+static int cf_check __maybe_unused nvmx_vmexit_event(
     struct vcpu *v, const struct x86_event *event)
 {
     nvmx_enqueue_n2_exceptions(v, event->vector, event->error_code,
@@ -2933,6 +2937,7 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
     .handle_cd            = vmx_handle_cd,
     .set_info_guest       = vmx_set_info_guest,
     .set_rdtsc_exiting    = vmx_set_rdtsc_exiting,
+#ifdef CONFIG_NESTED_VIRT
     .nhvm_vcpu_initialise = nvmx_vcpu_initialise,
     .nhvm_vcpu_destroy    = nvmx_vcpu_destroy,
     .nhvm_vcpu_reset      = nvmx_vcpu_reset,
@@ -2942,8 +2947,9 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
     .nhvm_vcpu_vmexit_event = nvmx_vmexit_event,
     .nhvm_intr_blocked    = nvmx_intr_blocked,
     .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources,
-    .update_vlapic_mode = vmx_vlapic_msr_changed,
     .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
+#endif
+    .update_vlapic_mode = vmx_vlapic_msr_changed,
 #ifdef CONFIG_VM_EVENT
     .enable_msr_interception = vmx_enable_msr_interception,
 #endif
diff --git a/xen/arch/x86/include/asm/hvm/nestedhvm.h b/xen/arch/x86/include/asm/hvm/nestedhvm.h
index ea2c1bc328..0372974b24 100644
--- a/xen/arch/x86/include/asm/hvm/nestedhvm.h
+++ b/xen/arch/x86/include/asm/hvm/nestedhvm.h
@@ -25,9 +25,21 @@ enum nestedhvm_vmexits {
 /* Nested HVM on/off per domain */
 static inline bool nestedhvm_enabled(const struct domain *d)
 {
-    return IS_ENABLED(CONFIG_HVM) && (d->options & XEN_DOMCTL_CDF_nested_virt);
+    return IS_ENABLED(CONFIG_NESTED_VIRT) &&
+           (d->options & XEN_DOMCTL_CDF_nested_virt);
 }
 
+/* Nested paging */
+#define NESTEDHVM_PAGEFAULT_DONE       0
+#define NESTEDHVM_PAGEFAULT_INJECT     1
+#define NESTEDHVM_PAGEFAULT_L1_ERROR   2
+#define NESTEDHVM_PAGEFAULT_L0_ERROR   3
+#define NESTEDHVM_PAGEFAULT_MMIO       4
+#define NESTEDHVM_PAGEFAULT_RETRY      5
+#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
+
+#ifdef CONFIG_NESTED_VIRT
+
 /* Nested VCPU */
 int nestedhvm_vcpu_initialise(struct vcpu *v);
 void nestedhvm_vcpu_destroy(struct vcpu *v);
@@ -38,14 +50,6 @@ bool nestedhvm_vcpu_in_guestmode(struct vcpu *v);
 #define nestedhvm_vcpu_exit_guestmode(v)  \
     vcpu_nestedhvm(v).nv_guestmode = 0
 
-/* Nested paging */
-#define NESTEDHVM_PAGEFAULT_DONE       0
-#define NESTEDHVM_PAGEFAULT_INJECT     1
-#define NESTEDHVM_PAGEFAULT_L1_ERROR   2
-#define NESTEDHVM_PAGEFAULT_L0_ERROR   3
-#define NESTEDHVM_PAGEFAULT_MMIO       4
-#define NESTEDHVM_PAGEFAULT_RETRY      5
-#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
 int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
                                     struct npfec npfec);
 
@@ -59,6 +63,25 @@ unsigned long *nestedhvm_vcpu_iomap_get(bool ioport_80, bool ioport_ed);
 
 void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m);
 
+#else /* !CONFIG_NESTED_VIRT */
+
+static inline int nestedhvm_vcpu_initialise(struct vcpu *v) { return 0; }
+static inline void nestedhvm_vcpu_destroy(struct vcpu *v) { }
+static inline void nestedhvm_vcpu_reset(struct vcpu *v) { }
+static inline bool nestedhvm_vcpu_in_guestmode(struct vcpu *v) { return false; }
+static inline int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
+                                                  struct npfec npfec)
+{
+    return NESTEDHVM_PAGEFAULT_L0_ERROR;
+}
+#define nestedhvm_vcpu_enter_guestmode(v) do { } while (0)
+#define nestedhvm_vcpu_exit_guestmode(v)  do { } while (0)
+#define nestedhvm_paging_mode_hap(v) false
+#define nestedhvm_vmswitch_in_progress(v) false
+static inline void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m) { }
+
+#endif /* CONFIG_NESTED_VIRT */
+
 static inline bool nestedhvm_is_n2(struct vcpu *v)
 {
     if ( !nestedhvm_enabled(v->domain) ||
diff --git a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
index da10d3fa96..8dc876a4c2 100644
--- a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
+++ b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
@@ -73,6 +73,8 @@ union vmx_inst_info {
     u32 word;
 };
 
+#ifdef CONFIG_NESTED_VIRT
+
 int cf_check nvmx_vcpu_initialise(struct vcpu *v);
 void cf_check nvmx_vcpu_destroy(struct vcpu *v);
 int cf_check nvmx_vcpu_reset(struct vcpu *v);
@@ -199,5 +201,33 @@ int nept_translate_l2ga(struct vcpu *v, paddr_t l2ga,
                         uint64_t *exit_qual, uint32_t *exit_reason);
 int nvmx_cpu_up_prepare(unsigned int cpu);
 void nvmx_cpu_dead(unsigned int cpu);
+
+#else /* !CONFIG_NESTED_VIRT */
+
+static inline void nvmx_update_exec_control(struct vcpu *v, u32 value) { }
+static inline void nvmx_update_secondary_exec_control(struct vcpu *v,
+                                                      unsigned long value) { }
+static inline void nvmx_update_exception_bitmap(struct vcpu *v,
+                                                unsigned long value) { }
+static inline u64 nvmx_get_tsc_offset(struct vcpu *v) { return 0; }
+static inline void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr) { }
+static inline bool nvmx_intercepts_exception(struct vcpu *v, unsigned int vector,
+                                             int error_code) { return false; }
+static inline int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs,
+                                         unsigned int exit_reason) { return 0; }
+static inline void nvmx_idtv_handling(void) { }
+static inline int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content)
+{
+    return 0;
+}
+static inline int nvmx_handle_vmx_insn(struct cpu_user_regs *regs,
+                                       unsigned int exit_reason) { return 0; }
+static inline int nvmx_cpu_up_prepare(unsigned int cpu) { return 0; }
+static inline void nvmx_cpu_dead(unsigned int cpu) { }
+
+#define get_vvmcs(vcpu, encoding) 0
+
+#endif /* CONFIG_NESTED_VIRT */
+
 #endif /* __ASM_X86_HVM_VVMX_H__ */
 
diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index 960f6e8409..aa15811c2e 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_SHADOW_PAGING) += guest_walk_4.o
 obj-$(CONFIG_VM_EVENT) += mem_access.o
 obj-$(CONFIG_MEM_PAGING) += mem_paging.o
 obj-$(CONFIG_MEM_SHARING) += mem_sharing.o
-obj-$(CONFIG_HVM) += nested.o
+obj-$(CONFIG_NESTED_VIRT) += nested.o
 obj-$(CONFIG_HVM) += p2m.o
 obj-y += p2m-basic.o
 obj-$(CONFIG_INTEL_VMX) += p2m-ept.o
diff --git a/xen/arch/x86/mm/hap/Makefile b/xen/arch/x86/mm/hap/Makefile
index 67c29b2162..de1bb3abde 100644
--- a/xen/arch/x86/mm/hap/Makefile
+++ b/xen/arch/x86/mm/hap/Makefile
@@ -2,5 +2,5 @@ obj-y += hap.o
 obj-y += guest_walk_2.o
 obj-y += guest_walk_3.o
 obj-y += guest_walk_4.o
-obj-y += nested_hap.o
-obj-$(CONFIG_INTEL_VMX) += nested_ept.o
+obj-$(CONFIG_NESTED_VIRT) += nested_hap.o
+obj-$(CONFIG_NESTED_VIRT) += nested_ept.o
diff --git a/xen/arch/x86/mm/p2m.h b/xen/arch/x86/mm/p2m.h
index 635f5a7f45..fa14e69fff 100644
--- a/xen/arch/x86/mm/p2m.h
+++ b/xen/arch/x86/mm/p2m.h
@@ -25,9 +25,15 @@ void p2m_teardown_altp2m(struct domain *d);
 void p2m_flush_table_locked(struct p2m_domain *p2m);
 int __must_check p2m_remove_entry(struct p2m_domain *p2m, gfn_t gfn, mfn_t mfn,
                                   unsigned int page_order);
+#ifdef CONFIG_NESTED_VIRT
 void p2m_nestedp2m_init(struct p2m_domain *p2m);
 int p2m_init_nestedp2m(struct domain *d);
 void p2m_teardown_nestedp2m(struct domain *d);
+#else
+static inline void p2m_nestedp2m_init(struct p2m_domain *p2m) {}
+static inline int p2m_init_nestedp2m(struct domain *d) { return 0; }
+static inline void p2m_teardown_nestedp2m(struct domain *d) {}
+#endif
 
 int ept_p2m_init(struct p2m_domain *p2m);
 void ept_p2m_uninit(struct p2m_domain *p2m);
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] x86/hvm: Add Kconfig option to disable nested virtualization
  2026-02-06  1:50 [PATCH] x86/hvm: Add Kconfig option to disable nested virtualization Stefano Stabellini
@ 2026-02-06  4:07 ` Demi Marie Obenour
  2026-02-06  8:15 ` Roger Pau Monné
  1 sibling, 0 replies; 10+ messages in thread
From: Demi Marie Obenour @ 2026-02-06  4:07 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: jbeulich, andrew.cooper3, roger.pau, jason.andryuk,
	alejandro.garciavallejo


[-- Attachment #1.1.1: Type: text/plain, Size: 16210 bytes --]

On 2/5/26 20:50, Stefano Stabellini wrote:
> Introduce CONFIG_NESTED_VIRT (default y, requires EXPERT to disable)
> to allow nested virtualization support to be disabled at build time.
> This is useful for embedded or safety-focused deployments where
> nested virtualization is not needed, reducing code size and attack
> surface.
> 
> When CONFIG_NESTED_VIRT=n, the following source files are excluded:
> - arch/x86/hvm/nestedhvm.c
> - arch/x86/hvm/svm/nestedsvm.c
> - arch/x86/hvm/vmx/vvmx.c
> - arch/x86/mm/nested.c
> - arch/x86/mm/hap/nested_hap.c
> - arch/x86/mm/hap/nested_ept.c
> 
> Add inline stubs where needed in headers.
> 
> No functional change when CONFIG_NESTED_VIRT=y.
> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
> ---
>  xen/arch/x86/hvm/Kconfig                 | 10 ++++++
>  xen/arch/x86/hvm/Makefile                |  2 +-
>  xen/arch/x86/hvm/svm/Makefile            |  2 +-
>  xen/arch/x86/hvm/svm/nestedhvm.h         | 44 +++++++++++++++++++++---
>  xen/arch/x86/hvm/svm/svm.c               |  6 ++++
>  xen/arch/x86/hvm/vmx/Makefile            |  2 +-
>  xen/arch/x86/hvm/vmx/vmx.c               | 10 ++++--
>  xen/arch/x86/include/asm/hvm/nestedhvm.h | 41 +++++++++++++++++-----
>  xen/arch/x86/include/asm/hvm/vmx/vvmx.h  | 30 ++++++++++++++++
>  xen/arch/x86/mm/Makefile                 |  2 +-
>  xen/arch/x86/mm/hap/Makefile             |  4 +--
>  xen/arch/x86/mm/p2m.h                    |  6 ++++
>  12 files changed, 137 insertions(+), 22 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/Kconfig b/xen/arch/x86/hvm/Kconfig
> index f32bf5cbb7..12b5df4710 100644
> --- a/xen/arch/x86/hvm/Kconfig
> +++ b/xen/arch/x86/hvm/Kconfig
> @@ -92,4 +92,14 @@ config MEM_SHARING
>  	bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED
>  	depends on INTEL_VMX
>  
> +config NESTED_VIRT
> +	bool "Nested virtualization support" if EXPERT
> +	depends on AMD_SVM || INTEL_VMX
> +	default y

Would it be possible to make this

	default n

instead?  If not, could the dependency on EXPERT be removed?

> +	help
> +	  Enable nested virtualization, allowing guests to run their own
> +	  hypervisors. This requires hardware support.
> +
> +	  If unsure, say Y.

I would go with:

	Xen's support is known to be buggy and insecure. If unsure, say N.

> +
>  endif
> diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
> index f34fb03934..b8a0a68624 100644
> --- a/xen/arch/x86/hvm/Makefile
> +++ b/xen/arch/x86/hvm/Makefile
> @@ -18,7 +18,7 @@ obj-y += irq.o
>  obj-y += mmio.o
>  obj-$(CONFIG_VM_EVENT) += monitor.o
>  obj-y += mtrr.o
> -obj-y += nestedhvm.o
> +obj-$(CONFIG_NESTED_VIRT) += nestedhvm.o
>  obj-y += pmtimer.o
>  obj-y += quirks.o
>  obj-y += rtc.o
> diff --git a/xen/arch/x86/hvm/svm/Makefile b/xen/arch/x86/hvm/svm/Makefile
> index 8a072cafd5..92418e3444 100644
> --- a/xen/arch/x86/hvm/svm/Makefile
> +++ b/xen/arch/x86/hvm/svm/Makefile
> @@ -2,6 +2,6 @@ obj-y += asid.o
>  obj-y += emulate.o
>  obj-bin-y += entry.o
>  obj-y += intr.o
> -obj-y += nestedsvm.o
> +obj-$(CONFIG_NESTED_VIRT) += nestedsvm.o
>  obj-y += svm.o
>  obj-y += vmcb.o
> diff --git a/xen/arch/x86/hvm/svm/nestedhvm.h b/xen/arch/x86/hvm/svm/nestedhvm.h
> index 9bfed5ffd7..a102c076ea 100644
> --- a/xen/arch/x86/hvm/svm/nestedhvm.h
> +++ b/xen/arch/x86/hvm/svm/nestedhvm.h
> @@ -26,6 +26,13 @@
>  #define nsvm_efer_svm_enabled(v) \
>      (!!((v)->arch.hvm.guest_efer & EFER_SVME))
>  
> +#define NSVM_INTR_NOTHANDLED     3
> +#define NSVM_INTR_NOTINTERCEPTED 2
> +#define NSVM_INTR_FORCEVMEXIT    1
> +#define NSVM_INTR_MASKED         0
> +
> +#ifdef CONFIG_NESTED_VIRT
> +
>  int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr);
>  void nestedsvm_vmexit_defer(struct vcpu *v,
>      uint64_t exitcode, uint64_t exitinfo1, uint64_t exitinfo2);
> @@ -57,13 +64,40 @@ int cf_check nsvm_hap_walk_L1_p2m(
>      struct vcpu *v, paddr_t L2_gpa, paddr_t *L1_gpa, unsigned int *page_order,
>      uint8_t *p2m_acc, struct npfec npfec);
>  
> -#define NSVM_INTR_NOTHANDLED     3
> -#define NSVM_INTR_NOTINTERCEPTED 2
> -#define NSVM_INTR_FORCEVMEXIT    1
> -#define NSVM_INTR_MASKED         0
> -
>  int nestedsvm_vcpu_interrupt(struct vcpu *v, const struct hvm_intack intack);
>  
> +#else /* !CONFIG_NESTED_VIRT */
> +
> +static inline int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr)
> +{
> +    return 0;
> +}
> +static inline void nestedsvm_vmexit_defer(struct vcpu *v,
> +    uint64_t exitcode, uint64_t exitinfo1, uint64_t exitinfo2) { }
> +static inline enum nestedhvm_vmexits nestedsvm_vmexit_n2n1(struct vcpu *v,
> +    struct cpu_user_regs *regs)
> +{
> +    return NESTEDHVM_VMEXIT_ERROR;
> +}
> +static inline enum nestedhvm_vmexits nestedsvm_check_intercepts(struct vcpu *v,
> +    struct cpu_user_regs *regs, uint64_t exitcode)
> +{
> +    return NESTEDHVM_VMEXIT_ERROR;
> +}
> +static inline void svm_nested_features_on_efer_update(struct vcpu *v) { }
> +static inline void svm_vmexit_do_clgi(struct cpu_user_regs *regs,
> +                                      struct vcpu *v) { }
> +static inline void svm_vmexit_do_stgi(struct cpu_user_regs *regs,
> +                                       struct vcpu *v) { }
> +static inline bool nestedsvm_gif_isset(struct vcpu *v) { return true; }
> +static inline int nestedsvm_vcpu_interrupt(struct vcpu *v,
> +                                           const struct hvm_intack intack)
> +{
> +    return NSVM_INTR_NOTINTERCEPTED;
> +}
> +
> +#endif /* CONFIG_NESTED_VIRT */
> +
>  #endif /* __X86_HVM_SVM_NESTEDHVM_PRIV_H__ */
>  
>  /*
> diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
> index 18ba837738..0234b57afb 100644
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -46,6 +46,10 @@
>  
>  void noreturn svm_asm_do_resume(void);
>  
> +#ifndef CONFIG_NESTED_VIRT
> +void asmlinkage nsvm_vcpu_switch(void) { }
> +#endif
> +
>  u32 svm_feature_flags;
>  
>  /*
> @@ -2465,6 +2469,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
>      .set_rdtsc_exiting    = svm_set_rdtsc_exiting,
>      .get_insn_bytes       = svm_get_insn_bytes,
>  
> +#ifdef CONFIG_NESTED_VIRT
>      .nhvm_vcpu_initialise = nsvm_vcpu_initialise,
>      .nhvm_vcpu_destroy = nsvm_vcpu_destroy,
>      .nhvm_vcpu_reset = nsvm_vcpu_reset,
> @@ -2474,6 +2479,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
>      .nhvm_vmcx_hap_enabled = nsvm_vmcb_hap_enabled,
>      .nhvm_intr_blocked = nsvm_intr_blocked,
>      .nhvm_hap_walk_L1_p2m = nsvm_hap_walk_L1_p2m,
> +#endif
>  
>      .get_reg = svm_get_reg,
>      .set_reg = svm_set_reg,
> diff --git a/xen/arch/x86/hvm/vmx/Makefile b/xen/arch/x86/hvm/vmx/Makefile
> index 04a29ce59d..902564b3e2 100644
> --- a/xen/arch/x86/hvm/vmx/Makefile
> +++ b/xen/arch/x86/hvm/vmx/Makefile
> @@ -3,4 +3,4 @@ obj-y += intr.o
>  obj-y += realmode.o
>  obj-y += vmcs.o
>  obj-y += vmx.o
> -obj-y += vvmx.o
> +obj-$(CONFIG_NESTED_VIRT) += vvmx.o
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 82c55f49ae..252f27322b 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -55,6 +55,10 @@
>  #include <public/hvm/save.h>
>  #include <public/sched.h>
>  
> +#ifndef CONFIG_NESTED_VIRT
> +void asmlinkage nvmx_switch_guest(void) { }
> +#endif
> +
>  static bool __initdata opt_force_ept;
>  boolean_param("force-ept", opt_force_ept);
>  
> @@ -2033,7 +2037,7 @@ static void nvmx_enqueue_n2_exceptions(struct vcpu *v,
>                   nvmx->intr.intr_info, nvmx->intr.error_code);
>  }
>  
> -static int cf_check nvmx_vmexit_event(
> +static int cf_check __maybe_unused nvmx_vmexit_event(
>      struct vcpu *v, const struct x86_event *event)
>  {
>      nvmx_enqueue_n2_exceptions(v, event->vector, event->error_code,
> @@ -2933,6 +2937,7 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
>      .handle_cd            = vmx_handle_cd,
>      .set_info_guest       = vmx_set_info_guest,
>      .set_rdtsc_exiting    = vmx_set_rdtsc_exiting,
> +#ifdef CONFIG_NESTED_VIRT
>      .nhvm_vcpu_initialise = nvmx_vcpu_initialise,
>      .nhvm_vcpu_destroy    = nvmx_vcpu_destroy,
>      .nhvm_vcpu_reset      = nvmx_vcpu_reset,
> @@ -2942,8 +2947,9 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
>      .nhvm_vcpu_vmexit_event = nvmx_vmexit_event,
>      .nhvm_intr_blocked    = nvmx_intr_blocked,
>      .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources,
> -    .update_vlapic_mode = vmx_vlapic_msr_changed,
>      .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
> +#endif
> +    .update_vlapic_mode = vmx_vlapic_msr_changed,
>  #ifdef CONFIG_VM_EVENT
>      .enable_msr_interception = vmx_enable_msr_interception,
>  #endif
> diff --git a/xen/arch/x86/include/asm/hvm/nestedhvm.h b/xen/arch/x86/include/asm/hvm/nestedhvm.h
> index ea2c1bc328..0372974b24 100644
> --- a/xen/arch/x86/include/asm/hvm/nestedhvm.h
> +++ b/xen/arch/x86/include/asm/hvm/nestedhvm.h
> @@ -25,9 +25,21 @@ enum nestedhvm_vmexits {
>  /* Nested HVM on/off per domain */
>  static inline bool nestedhvm_enabled(const struct domain *d)
>  {
> -    return IS_ENABLED(CONFIG_HVM) && (d->options & XEN_DOMCTL_CDF_nested_virt);
> +    return IS_ENABLED(CONFIG_NESTED_VIRT) &&
> +           (d->options & XEN_DOMCTL_CDF_nested_virt);
>  }
>  
> +/* Nested paging */
> +#define NESTEDHVM_PAGEFAULT_DONE       0
> +#define NESTEDHVM_PAGEFAULT_INJECT     1
> +#define NESTEDHVM_PAGEFAULT_L1_ERROR   2
> +#define NESTEDHVM_PAGEFAULT_L0_ERROR   3
> +#define NESTEDHVM_PAGEFAULT_MMIO       4
> +#define NESTEDHVM_PAGEFAULT_RETRY      5
> +#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
> +
> +#ifdef CONFIG_NESTED_VIRT
> +
>  /* Nested VCPU */
>  int nestedhvm_vcpu_initialise(struct vcpu *v);
>  void nestedhvm_vcpu_destroy(struct vcpu *v);
> @@ -38,14 +50,6 @@ bool nestedhvm_vcpu_in_guestmode(struct vcpu *v);
>  #define nestedhvm_vcpu_exit_guestmode(v)  \
>      vcpu_nestedhvm(v).nv_guestmode = 0
>  
> -/* Nested paging */
> -#define NESTEDHVM_PAGEFAULT_DONE       0
> -#define NESTEDHVM_PAGEFAULT_INJECT     1
> -#define NESTEDHVM_PAGEFAULT_L1_ERROR   2
> -#define NESTEDHVM_PAGEFAULT_L0_ERROR   3
> -#define NESTEDHVM_PAGEFAULT_MMIO       4
> -#define NESTEDHVM_PAGEFAULT_RETRY      5
> -#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
>  int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
>                                      struct npfec npfec);
>  
> @@ -59,6 +63,25 @@ unsigned long *nestedhvm_vcpu_iomap_get(bool ioport_80, bool ioport_ed);
>  
>  void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m);
>  
> +#else /* !CONFIG_NESTED_VIRT */
> +
> +static inline int nestedhvm_vcpu_initialise(struct vcpu *v) { return 0; }
> +static inline void nestedhvm_vcpu_destroy(struct vcpu *v) { }
> +static inline void nestedhvm_vcpu_reset(struct vcpu *v) { }
> +static inline bool nestedhvm_vcpu_in_guestmode(struct vcpu *v) { return false; }
> +static inline int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
> +                                                  struct npfec npfec)
> +{
> +    return NESTEDHVM_PAGEFAULT_L0_ERROR;
> +}
> +#define nestedhvm_vcpu_enter_guestmode(v) do { } while (0)
> +#define nestedhvm_vcpu_exit_guestmode(v)  do { } while (0)
> +#define nestedhvm_paging_mode_hap(v) false
> +#define nestedhvm_vmswitch_in_progress(v) false
> +static inline void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m) { }
> +
> +#endif /* CONFIG_NESTED_VIRT */
> +
>  static inline bool nestedhvm_is_n2(struct vcpu *v)
>  {
>      if ( !nestedhvm_enabled(v->domain) ||
> diff --git a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
> index da10d3fa96..8dc876a4c2 100644
> --- a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
> +++ b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
> @@ -73,6 +73,8 @@ union vmx_inst_info {
>      u32 word;
>  };
>  
> +#ifdef CONFIG_NESTED_VIRT
> +
>  int cf_check nvmx_vcpu_initialise(struct vcpu *v);
>  void cf_check nvmx_vcpu_destroy(struct vcpu *v);
>  int cf_check nvmx_vcpu_reset(struct vcpu *v);
> @@ -199,5 +201,33 @@ int nept_translate_l2ga(struct vcpu *v, paddr_t l2ga,
>                          uint64_t *exit_qual, uint32_t *exit_reason);
>  int nvmx_cpu_up_prepare(unsigned int cpu);
>  void nvmx_cpu_dead(unsigned int cpu);
> +
> +#else /* !CONFIG_NESTED_VIRT */
> +
> +static inline void nvmx_update_exec_control(struct vcpu *v, u32 value) { }
> +static inline void nvmx_update_secondary_exec_control(struct vcpu *v,
> +                                                      unsigned long value) { }
> +static inline void nvmx_update_exception_bitmap(struct vcpu *v,
> +                                                unsigned long value) { }
> +static inline u64 nvmx_get_tsc_offset(struct vcpu *v) { return 0; }
> +static inline void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr) { }
> +static inline bool nvmx_intercepts_exception(struct vcpu *v, unsigned int vector,
> +                                             int error_code) { return false; }
> +static inline int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs,
> +                                         unsigned int exit_reason) { return 0; }
> +static inline void nvmx_idtv_handling(void) { }
> +static inline int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content)
> +{
> +    return 0;
> +}
> +static inline int nvmx_handle_vmx_insn(struct cpu_user_regs *regs,
> +                                       unsigned int exit_reason) { return 0; }
> +static inline int nvmx_cpu_up_prepare(unsigned int cpu) { return 0; }
> +static inline void nvmx_cpu_dead(unsigned int cpu) { }
> +
> +#define get_vvmcs(vcpu, encoding) 0
> +
> +#endif /* CONFIG_NESTED_VIRT */
> +
>  #endif /* __ASM_X86_HVM_VVMX_H__ */
>  
> diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
> index 960f6e8409..aa15811c2e 100644
> --- a/xen/arch/x86/mm/Makefile
> +++ b/xen/arch/x86/mm/Makefile
> @@ -7,7 +7,7 @@ obj-$(CONFIG_SHADOW_PAGING) += guest_walk_4.o
>  obj-$(CONFIG_VM_EVENT) += mem_access.o
>  obj-$(CONFIG_MEM_PAGING) += mem_paging.o
>  obj-$(CONFIG_MEM_SHARING) += mem_sharing.o
> -obj-$(CONFIG_HVM) += nested.o
> +obj-$(CONFIG_NESTED_VIRT) += nested.o
>  obj-$(CONFIG_HVM) += p2m.o
>  obj-y += p2m-basic.o
>  obj-$(CONFIG_INTEL_VMX) += p2m-ept.o
> diff --git a/xen/arch/x86/mm/hap/Makefile b/xen/arch/x86/mm/hap/Makefile
> index 67c29b2162..de1bb3abde 100644
> --- a/xen/arch/x86/mm/hap/Makefile
> +++ b/xen/arch/x86/mm/hap/Makefile
> @@ -2,5 +2,5 @@ obj-y += hap.o
>  obj-y += guest_walk_2.o
>  obj-y += guest_walk_3.o
>  obj-y += guest_walk_4.o
> -obj-y += nested_hap.o
> -obj-$(CONFIG_INTEL_VMX) += nested_ept.o
> +obj-$(CONFIG_NESTED_VIRT) += nested_hap.o
> +obj-$(CONFIG_NESTED_VIRT) += nested_ept.o
> diff --git a/xen/arch/x86/mm/p2m.h b/xen/arch/x86/mm/p2m.h
> index 635f5a7f45..fa14e69fff 100644
> --- a/xen/arch/x86/mm/p2m.h
> +++ b/xen/arch/x86/mm/p2m.h
> @@ -25,9 +25,15 @@ void p2m_teardown_altp2m(struct domain *d);
>  void p2m_flush_table_locked(struct p2m_domain *p2m);
>  int __must_check p2m_remove_entry(struct p2m_domain *p2m, gfn_t gfn, mfn_t mfn,
>                                    unsigned int page_order);
> +#ifdef CONFIG_NESTED_VIRT
>  void p2m_nestedp2m_init(struct p2m_domain *p2m);
>  int p2m_init_nestedp2m(struct domain *d);
>  void p2m_teardown_nestedp2m(struct domain *d);
> +#else
> +static inline void p2m_nestedp2m_init(struct p2m_domain *p2m) {}
> +static inline int p2m_init_nestedp2m(struct domain *d) { return 0; }
> +static inline void p2m_teardown_nestedp2m(struct domain *d) {}
> +#endif
>  
>  int ept_p2m_init(struct p2m_domain *p2m);
>  void ept_p2m_uninit(struct p2m_domain *p2m);


-- 
Sincerely,
Demi Marie Obenour (she/her/hers)

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 7253 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] x86/hvm: Add Kconfig option to disable nested virtualization
  2026-02-06  1:50 [PATCH] x86/hvm: Add Kconfig option to disable nested virtualization Stefano Stabellini
  2026-02-06  4:07 ` Demi Marie Obenour
@ 2026-02-06  8:15 ` Roger Pau Monné
  2026-02-06 20:52   ` Stefano Stabellini
  1 sibling, 1 reply; 10+ messages in thread
From: Roger Pau Monné @ 2026-02-06  8:15 UTC (permalink / raw)
  To: Stefano Stabellini
  Cc: xen-devel, jbeulich, andrew.cooper3, jason.andryuk,
	alejandro.garciavallejo

On Thu, Feb 05, 2026 at 05:50:32PM -0800, Stefano Stabellini wrote:
> Introduce CONFIG_NESTED_VIRT (default y, requires EXPERT to disable)
> to allow nested virtualization support to be disabled at build time.
> This is useful for embedded or safety-focused deployments where
> nested virtualization is not needed, reducing code size and attack
> surface.
> 
> When CONFIG_NESTED_VIRT=n, the following source files are excluded:
> - arch/x86/hvm/nestedhvm.c
> - arch/x86/hvm/svm/nestedsvm.c
> - arch/x86/hvm/vmx/vvmx.c
> - arch/x86/mm/nested.c
> - arch/x86/mm/hap/nested_hap.c
> - arch/x86/mm/hap/nested_ept.c
> 
> Add inline stubs where needed in headers.
> 
> No functional change when CONFIG_NESTED_VIRT=y.

You also need to adjust arch_sanitise_domain_config() so it refuses to
create domains with the XEN_DOMCTL_CDF_nested_virt flag set when
CONFIG_NESTED_VIRT=n.  If you do that I think a bunch of the dummy
helpers that you add when CONFIG_NESTED_VIRT=n should also gain an
ASSERT_UNREACHABLE().

And IMO you will also need to add a new XEN_SYSCTL_PHYSCAP_nestedhvm
(or alike) to signal the toolstack whether the nested HVM feature is
available.  Much like we do for HAP/Shadow/gnttab availability.

> Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
> ---
>  xen/arch/x86/hvm/Kconfig                 | 10 ++++++
>  xen/arch/x86/hvm/Makefile                |  2 +-
>  xen/arch/x86/hvm/svm/Makefile            |  2 +-
>  xen/arch/x86/hvm/svm/nestedhvm.h         | 44 +++++++++++++++++++++---
>  xen/arch/x86/hvm/svm/svm.c               |  6 ++++
>  xen/arch/x86/hvm/vmx/Makefile            |  2 +-
>  xen/arch/x86/hvm/vmx/vmx.c               | 10 ++++--
>  xen/arch/x86/include/asm/hvm/nestedhvm.h | 41 +++++++++++++++++-----
>  xen/arch/x86/include/asm/hvm/vmx/vvmx.h  | 30 ++++++++++++++++
>  xen/arch/x86/mm/Makefile                 |  2 +-
>  xen/arch/x86/mm/hap/Makefile             |  4 +--
>  xen/arch/x86/mm/p2m.h                    |  6 ++++
>  12 files changed, 137 insertions(+), 22 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/Kconfig b/xen/arch/x86/hvm/Kconfig
> index f32bf5cbb7..12b5df4710 100644
> --- a/xen/arch/x86/hvm/Kconfig
> +++ b/xen/arch/x86/hvm/Kconfig
> @@ -92,4 +92,14 @@ config MEM_SHARING
>  	bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED
>  	depends on INTEL_VMX
>  
> +config NESTED_VIRT
> +	bool "Nested virtualization support" if EXPERT
> +	depends on AMD_SVM || INTEL_VMX
> +	default y
> +	help
> +	  Enable nested virtualization, allowing guests to run their own
> +	  hypervisors. This requires hardware support.
> +
> +	  If unsure, say Y.

If we go that route, I think nested virt should become off by default.
It's not security supported, and known to be broken in many areas.

I'm also unsure about whether this wants to be gated under EXPERT.
But I'm not sure I'm any good at knowing whether something should be
under EXPERT or not.

> +
>  endif
> diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
> index f34fb03934..b8a0a68624 100644
> --- a/xen/arch/x86/hvm/Makefile
> +++ b/xen/arch/x86/hvm/Makefile
> @@ -18,7 +18,7 @@ obj-y += irq.o
>  obj-y += mmio.o
>  obj-$(CONFIG_VM_EVENT) += monitor.o
>  obj-y += mtrr.o
> -obj-y += nestedhvm.o
> +obj-$(CONFIG_NESTED_VIRT) += nestedhvm.o
>  obj-y += pmtimer.o
>  obj-y += quirks.o
>  obj-y += rtc.o
> diff --git a/xen/arch/x86/hvm/svm/Makefile b/xen/arch/x86/hvm/svm/Makefile
> index 8a072cafd5..92418e3444 100644
> --- a/xen/arch/x86/hvm/svm/Makefile
> +++ b/xen/arch/x86/hvm/svm/Makefile
> @@ -2,6 +2,6 @@ obj-y += asid.o
>  obj-y += emulate.o
>  obj-bin-y += entry.o
>  obj-y += intr.o
> -obj-y += nestedsvm.o
> +obj-$(CONFIG_NESTED_VIRT) += nestedsvm.o
>  obj-y += svm.o
>  obj-y += vmcb.o
> diff --git a/xen/arch/x86/hvm/svm/nestedhvm.h b/xen/arch/x86/hvm/svm/nestedhvm.h
> index 9bfed5ffd7..a102c076ea 100644
> --- a/xen/arch/x86/hvm/svm/nestedhvm.h
> +++ b/xen/arch/x86/hvm/svm/nestedhvm.h
> @@ -26,6 +26,13 @@
>  #define nsvm_efer_svm_enabled(v) \
>      (!!((v)->arch.hvm.guest_efer & EFER_SVME))
>  
> +#define NSVM_INTR_NOTHANDLED     3
> +#define NSVM_INTR_NOTINTERCEPTED 2
> +#define NSVM_INTR_FORCEVMEXIT    1
> +#define NSVM_INTR_MASKED         0
> +
> +#ifdef CONFIG_NESTED_VIRT
> +
>  int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr);
>  void nestedsvm_vmexit_defer(struct vcpu *v,
>      uint64_t exitcode, uint64_t exitinfo1, uint64_t exitinfo2);
> @@ -57,13 +64,40 @@ int cf_check nsvm_hap_walk_L1_p2m(
>      struct vcpu *v, paddr_t L2_gpa, paddr_t *L1_gpa, unsigned int *page_order,
>      uint8_t *p2m_acc, struct npfec npfec);
>  
> -#define NSVM_INTR_NOTHANDLED     3
> -#define NSVM_INTR_NOTINTERCEPTED 2
> -#define NSVM_INTR_FORCEVMEXIT    1
> -#define NSVM_INTR_MASKED         0
> -
>  int nestedsvm_vcpu_interrupt(struct vcpu *v, const struct hvm_intack intack);
>  
> +#else /* !CONFIG_NESTED_VIRT */
> +
> +static inline int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr)
> +{
> +    return 0;
> +}
> +static inline void nestedsvm_vmexit_defer(struct vcpu *v,
> +    uint64_t exitcode, uint64_t exitinfo1, uint64_t exitinfo2) { }
> +static inline enum nestedhvm_vmexits nestedsvm_vmexit_n2n1(struct vcpu *v,
> +    struct cpu_user_regs *regs)
> +{
> +    return NESTEDHVM_VMEXIT_ERROR;
> +}
> +static inline enum nestedhvm_vmexits nestedsvm_check_intercepts(struct vcpu *v,
> +    struct cpu_user_regs *regs, uint64_t exitcode)
> +{
> +    return NESTEDHVM_VMEXIT_ERROR;
> +}
> +static inline void svm_nested_features_on_efer_update(struct vcpu *v) { }
> +static inline void svm_vmexit_do_clgi(struct cpu_user_regs *regs,
> +                                      struct vcpu *v) { }
> +static inline void svm_vmexit_do_stgi(struct cpu_user_regs *regs,
> +                                       struct vcpu *v) { }
> +static inline bool nestedsvm_gif_isset(struct vcpu *v) { return true; }
> +static inline int nestedsvm_vcpu_interrupt(struct vcpu *v,
> +                                           const struct hvm_intack intack)
> +{
> +    return NSVM_INTR_NOTINTERCEPTED;
> +}
> +
> +#endif /* CONFIG_NESTED_VIRT */
> +
>  #endif /* __X86_HVM_SVM_NESTEDHVM_PRIV_H__ */
>  
>  /*
> diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
> index 18ba837738..0234b57afb 100644
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -46,6 +46,10 @@
>  
>  void noreturn svm_asm_do_resume(void);
>  
> +#ifndef CONFIG_NESTED_VIRT
> +void asmlinkage nsvm_vcpu_switch(void) { }
> +#endif
> +
>  u32 svm_feature_flags;
>  
>  /*
> @@ -2465,6 +2469,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
>      .set_rdtsc_exiting    = svm_set_rdtsc_exiting,
>      .get_insn_bytes       = svm_get_insn_bytes,
>  
> +#ifdef CONFIG_NESTED_VIRT
>      .nhvm_vcpu_initialise = nsvm_vcpu_initialise,
>      .nhvm_vcpu_destroy = nsvm_vcpu_destroy,
>      .nhvm_vcpu_reset = nsvm_vcpu_reset,
> @@ -2474,6 +2479,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
>      .nhvm_vmcx_hap_enabled = nsvm_vmcb_hap_enabled,
>      .nhvm_intr_blocked = nsvm_intr_blocked,
>      .nhvm_hap_walk_L1_p2m = nsvm_hap_walk_L1_p2m,
> +#endif
>  
>      .get_reg = svm_get_reg,
>      .set_reg = svm_set_reg,
> diff --git a/xen/arch/x86/hvm/vmx/Makefile b/xen/arch/x86/hvm/vmx/Makefile
> index 04a29ce59d..902564b3e2 100644
> --- a/xen/arch/x86/hvm/vmx/Makefile
> +++ b/xen/arch/x86/hvm/vmx/Makefile
> @@ -3,4 +3,4 @@ obj-y += intr.o
>  obj-y += realmode.o
>  obj-y += vmcs.o
>  obj-y += vmx.o
> -obj-y += vvmx.o
> +obj-$(CONFIG_NESTED_VIRT) += vvmx.o
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 82c55f49ae..252f27322b 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -55,6 +55,10 @@
>  #include <public/hvm/save.h>
>  #include <public/sched.h>
>  
> +#ifndef CONFIG_NESTED_VIRT
> +void asmlinkage nvmx_switch_guest(void) { }
> +#endif
> +
>  static bool __initdata opt_force_ept;
>  boolean_param("force-ept", opt_force_ept);
>  
> @@ -2033,7 +2037,7 @@ static void nvmx_enqueue_n2_exceptions(struct vcpu *v,
>                   nvmx->intr.intr_info, nvmx->intr.error_code);
>  }
>  
> -static int cf_check nvmx_vmexit_event(
> +static int cf_check __maybe_unused nvmx_vmexit_event(
>      struct vcpu *v, const struct x86_event *event)
>  {
>      nvmx_enqueue_n2_exceptions(v, event->vector, event->error_code,
> @@ -2933,6 +2937,7 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
>      .handle_cd            = vmx_handle_cd,
>      .set_info_guest       = vmx_set_info_guest,
>      .set_rdtsc_exiting    = vmx_set_rdtsc_exiting,
> +#ifdef CONFIG_NESTED_VIRT
>      .nhvm_vcpu_initialise = nvmx_vcpu_initialise,
>      .nhvm_vcpu_destroy    = nvmx_vcpu_destroy,
>      .nhvm_vcpu_reset      = nvmx_vcpu_reset,
> @@ -2942,8 +2947,9 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
>      .nhvm_vcpu_vmexit_event = nvmx_vmexit_event,
>      .nhvm_intr_blocked    = nvmx_intr_blocked,
>      .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources,
> -    .update_vlapic_mode = vmx_vlapic_msr_changed,
>      .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
> +#endif
> +    .update_vlapic_mode = vmx_vlapic_msr_changed,
>  #ifdef CONFIG_VM_EVENT
>      .enable_msr_interception = vmx_enable_msr_interception,
>  #endif
> diff --git a/xen/arch/x86/include/asm/hvm/nestedhvm.h b/xen/arch/x86/include/asm/hvm/nestedhvm.h
> index ea2c1bc328..0372974b24 100644
> --- a/xen/arch/x86/include/asm/hvm/nestedhvm.h
> +++ b/xen/arch/x86/include/asm/hvm/nestedhvm.h
> @@ -25,9 +25,21 @@ enum nestedhvm_vmexits {
>  /* Nested HVM on/off per domain */
>  static inline bool nestedhvm_enabled(const struct domain *d)
>  {
> -    return IS_ENABLED(CONFIG_HVM) && (d->options & XEN_DOMCTL_CDF_nested_virt);
> +    return IS_ENABLED(CONFIG_NESTED_VIRT) &&
> +           (d->options & XEN_DOMCTL_CDF_nested_virt);
>  }
>  
> +/* Nested paging */
> +#define NESTEDHVM_PAGEFAULT_DONE       0
> +#define NESTEDHVM_PAGEFAULT_INJECT     1
> +#define NESTEDHVM_PAGEFAULT_L1_ERROR   2
> +#define NESTEDHVM_PAGEFAULT_L0_ERROR   3
> +#define NESTEDHVM_PAGEFAULT_MMIO       4
> +#define NESTEDHVM_PAGEFAULT_RETRY      5
> +#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
> +
> +#ifdef CONFIG_NESTED_VIRT
> +
>  /* Nested VCPU */
>  int nestedhvm_vcpu_initialise(struct vcpu *v);
>  void nestedhvm_vcpu_destroy(struct vcpu *v);
> @@ -38,14 +50,6 @@ bool nestedhvm_vcpu_in_guestmode(struct vcpu *v);
>  #define nestedhvm_vcpu_exit_guestmode(v)  \
>      vcpu_nestedhvm(v).nv_guestmode = 0
>  
> -/* Nested paging */
> -#define NESTEDHVM_PAGEFAULT_DONE       0
> -#define NESTEDHVM_PAGEFAULT_INJECT     1
> -#define NESTEDHVM_PAGEFAULT_L1_ERROR   2
> -#define NESTEDHVM_PAGEFAULT_L0_ERROR   3
> -#define NESTEDHVM_PAGEFAULT_MMIO       4
> -#define NESTEDHVM_PAGEFAULT_RETRY      5
> -#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
>  int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
>                                      struct npfec npfec);
>  
> @@ -59,6 +63,25 @@ unsigned long *nestedhvm_vcpu_iomap_get(bool ioport_80, bool ioport_ed);
>  
>  void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m);
>  
> +#else /* !CONFIG_NESTED_VIRT */
> +
> +static inline int nestedhvm_vcpu_initialise(struct vcpu *v) { return 0; }
> +static inline void nestedhvm_vcpu_destroy(struct vcpu *v) { }
> +static inline void nestedhvm_vcpu_reset(struct vcpu *v) { }
> +static inline bool nestedhvm_vcpu_in_guestmode(struct vcpu *v) { return false; }
> +static inline int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
> +                                                  struct npfec npfec)
> +{
> +    return NESTEDHVM_PAGEFAULT_L0_ERROR;
> +}
> +#define nestedhvm_vcpu_enter_guestmode(v) do { } while (0)
> +#define nestedhvm_vcpu_exit_guestmode(v)  do { } while (0)
> +#define nestedhvm_paging_mode_hap(v) false
> +#define nestedhvm_vmswitch_in_progress(v) false
> +static inline void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m) { }
> +
> +#endif /* CONFIG_NESTED_VIRT */
> +
>  static inline bool nestedhvm_is_n2(struct vcpu *v)
>  {
>      if ( !nestedhvm_enabled(v->domain) ||
> diff --git a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
> index da10d3fa96..8dc876a4c2 100644
> --- a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
> +++ b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
> @@ -73,6 +73,8 @@ union vmx_inst_info {
>      u32 word;
>  };
>  
> +#ifdef CONFIG_NESTED_VIRT
> +
>  int cf_check nvmx_vcpu_initialise(struct vcpu *v);
>  void cf_check nvmx_vcpu_destroy(struct vcpu *v);
>  int cf_check nvmx_vcpu_reset(struct vcpu *v);
> @@ -199,5 +201,33 @@ int nept_translate_l2ga(struct vcpu *v, paddr_t l2ga,
>                          uint64_t *exit_qual, uint32_t *exit_reason);
>  int nvmx_cpu_up_prepare(unsigned int cpu);
>  void nvmx_cpu_dead(unsigned int cpu);
> +
> +#else /* !CONFIG_NESTED_VIRT */
> +
> +static inline void nvmx_update_exec_control(struct vcpu *v, u32 value) { }
> +static inline void nvmx_update_secondary_exec_control(struct vcpu *v,
> +                                                      unsigned long value) { }
> +static inline void nvmx_update_exception_bitmap(struct vcpu *v,
> +                                                unsigned long value) { }
> +static inline u64 nvmx_get_tsc_offset(struct vcpu *v) { return 0; }
> +static inline void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr) { }
> +static inline bool nvmx_intercepts_exception(struct vcpu *v, unsigned int vector,
> +                                             int error_code) { return false; }
> +static inline int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs,
> +                                         unsigned int exit_reason) { return 0; }
> +static inline void nvmx_idtv_handling(void) { }
> +static inline int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content)
> +{
> +    return 0;
> +}
> +static inline int nvmx_handle_vmx_insn(struct cpu_user_regs *regs,
> +                                       unsigned int exit_reason) { return 0; }
> +static inline int nvmx_cpu_up_prepare(unsigned int cpu) { return 0; }
> +static inline void nvmx_cpu_dead(unsigned int cpu) { }
> +
> +#define get_vvmcs(vcpu, encoding) 0
> +
> +#endif /* CONFIG_NESTED_VIRT */
> +
>  #endif /* __ASM_X86_HVM_VVMX_H__ */
>  
> diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
> index 960f6e8409..aa15811c2e 100644
> --- a/xen/arch/x86/mm/Makefile
> +++ b/xen/arch/x86/mm/Makefile
> @@ -7,7 +7,7 @@ obj-$(CONFIG_SHADOW_PAGING) += guest_walk_4.o
>  obj-$(CONFIG_VM_EVENT) += mem_access.o
>  obj-$(CONFIG_MEM_PAGING) += mem_paging.o
>  obj-$(CONFIG_MEM_SHARING) += mem_sharing.o
> -obj-$(CONFIG_HVM) += nested.o
> +obj-$(CONFIG_NESTED_VIRT) += nested.o
>  obj-$(CONFIG_HVM) += p2m.o
>  obj-y += p2m-basic.o
>  obj-$(CONFIG_INTEL_VMX) += p2m-ept.o
> diff --git a/xen/arch/x86/mm/hap/Makefile b/xen/arch/x86/mm/hap/Makefile
> index 67c29b2162..de1bb3abde 100644
> --- a/xen/arch/x86/mm/hap/Makefile
> +++ b/xen/arch/x86/mm/hap/Makefile
> @@ -2,5 +2,5 @@ obj-y += hap.o
>  obj-y += guest_walk_2.o
>  obj-y += guest_walk_3.o
>  obj-y += guest_walk_4.o
> -obj-y += nested_hap.o
> -obj-$(CONFIG_INTEL_VMX) += nested_ept.o
> +obj-$(CONFIG_NESTED_VIRT) += nested_hap.o
> +obj-$(CONFIG_NESTED_VIRT) += nested_ept.o

With this change nested_ept.o is no longer gated explicitly on
CONFIG_INTEL_VMX, which could cause build issues if you have a Kconfig
like:

CONFIG_INTEL_VMX=n
CONFIG_AMD_SVM=y
CONFIG_NESTED_VIRT=y

Does the code in nested_ept.o have dependencies on other files gated
by CONFIG_INTEL_VMX, and hence would fail at the linking stage?  And
even if it builds, the code in nested_ept.o would be unreachable I
expect.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] x86/hvm: Add Kconfig option to disable nested virtualization
  2026-02-06  8:15 ` Roger Pau Monné
@ 2026-02-06 20:52   ` Stefano Stabellini
  0 siblings, 0 replies; 10+ messages in thread
From: Stefano Stabellini @ 2026-02-06 20:52 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Stefano Stabellini, xen-devel, jbeulich, andrew.cooper3,
	jason.andryuk, alejandro.garciavallejo

[-- Attachment #1: Type: text/plain, Size: 17025 bytes --]

On Fri, 5 Feb 2026, Roger Pau Monné wrote:
> On Thu, Feb 05, 2026 at 05:50:32PM -0800, Stefano Stabellini wrote:
> > Introduce CONFIG_NESTED_VIRT (default y, requires EXPERT to disable)
> > to allow nested virtualization support to be disabled at build time.
> > This is useful for embedded or safety-focused deployments where
> > nested virtualization is not needed, reducing code size and attack
> > surface.
> > 
> > When CONFIG_NESTED_VIRT=n, the following source files are excluded:
> > - arch/x86/hvm/nestedhvm.c
> > - arch/x86/hvm/svm/nestedsvm.c
> > - arch/x86/hvm/vmx/vvmx.c
> > - arch/x86/mm/nested.c
> > - arch/x86/mm/hap/nested_hap.c
> > - arch/x86/mm/hap/nested_ept.c
> > 
> > Add inline stubs where needed in headers.
> > 
> > No functional change when CONFIG_NESTED_VIRT=y.
> 
> You also need to adjust arch_sanitise_domain_config() so it refuses to
> create domains with the XEN_DOMCTL_CDF_nested_virt flag set when
> CONFIG_NESTED_VIRT=n. 

Sounds good


> If you do that I think a bunch of the dummy
> helpers that you add when CONFIG_NESTED_VIRT=n should also gain an
> ASSERT_UNREACHABLE().

OK


> And IMO you will also need to add a new XEN_SYSCTL_PHYSCAP_nestedhvm
> (or alike) to signal the toolstack whether the nested HVM feature is
> available.  Much like we do for HAP/Shadow/gnttab availability.

yeah good point


> > Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
> > ---
> >  xen/arch/x86/hvm/Kconfig                 | 10 ++++++
> >  xen/arch/x86/hvm/Makefile                |  2 +-
> >  xen/arch/x86/hvm/svm/Makefile            |  2 +-
> >  xen/arch/x86/hvm/svm/nestedhvm.h         | 44 +++++++++++++++++++++---
> >  xen/arch/x86/hvm/svm/svm.c               |  6 ++++
> >  xen/arch/x86/hvm/vmx/Makefile            |  2 +-
> >  xen/arch/x86/hvm/vmx/vmx.c               | 10 ++++--
> >  xen/arch/x86/include/asm/hvm/nestedhvm.h | 41 +++++++++++++++++-----
> >  xen/arch/x86/include/asm/hvm/vmx/vvmx.h  | 30 ++++++++++++++++
> >  xen/arch/x86/mm/Makefile                 |  2 +-
> >  xen/arch/x86/mm/hap/Makefile             |  4 +--
> >  xen/arch/x86/mm/p2m.h                    |  6 ++++
> >  12 files changed, 137 insertions(+), 22 deletions(-)
> > 
> > diff --git a/xen/arch/x86/hvm/Kconfig b/xen/arch/x86/hvm/Kconfig
> > index f32bf5cbb7..12b5df4710 100644
> > --- a/xen/arch/x86/hvm/Kconfig
> > +++ b/xen/arch/x86/hvm/Kconfig
> > @@ -92,4 +92,14 @@ config MEM_SHARING
> >  	bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED
> >  	depends on INTEL_VMX
> >  
> > +config NESTED_VIRT
> > +	bool "Nested virtualization support" if EXPERT
> > +	depends on AMD_SVM || INTEL_VMX
> > +	default y
> > +	help
> > +	  Enable nested virtualization, allowing guests to run their own
> > +	  hypervisors. This requires hardware support.
> > +
> > +	  If unsure, say Y.
> 
> If we go that route, I think nested virt should become off by default.
> It's not security supported, and known to be broken in many areas.
> 
> I'm also unsure about whether this wants to be gated under EXPERT.
> But I'm not sure I'm any good at knowing whether something should be
> under EXPERT or not.

I am happy either way and I'll others decide on the default. I did it
this way to avoid changes over the current baseline. In case there is
disagreement, I am also happy if it gets changed on commit based on the
latest preference.


> > +
> >  endif
> > diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
> > index f34fb03934..b8a0a68624 100644
> > --- a/xen/arch/x86/hvm/Makefile
> > +++ b/xen/arch/x86/hvm/Makefile
> > @@ -18,7 +18,7 @@ obj-y += irq.o
> >  obj-y += mmio.o
> >  obj-$(CONFIG_VM_EVENT) += monitor.o
> >  obj-y += mtrr.o
> > -obj-y += nestedhvm.o
> > +obj-$(CONFIG_NESTED_VIRT) += nestedhvm.o
> >  obj-y += pmtimer.o
> >  obj-y += quirks.o
> >  obj-y += rtc.o
> > diff --git a/xen/arch/x86/hvm/svm/Makefile b/xen/arch/x86/hvm/svm/Makefile
> > index 8a072cafd5..92418e3444 100644
> > --- a/xen/arch/x86/hvm/svm/Makefile
> > +++ b/xen/arch/x86/hvm/svm/Makefile
> > @@ -2,6 +2,6 @@ obj-y += asid.o
> >  obj-y += emulate.o
> >  obj-bin-y += entry.o
> >  obj-y += intr.o
> > -obj-y += nestedsvm.o
> > +obj-$(CONFIG_NESTED_VIRT) += nestedsvm.o
> >  obj-y += svm.o
> >  obj-y += vmcb.o
> > diff --git a/xen/arch/x86/hvm/svm/nestedhvm.h b/xen/arch/x86/hvm/svm/nestedhvm.h
> > index 9bfed5ffd7..a102c076ea 100644
> > --- a/xen/arch/x86/hvm/svm/nestedhvm.h
> > +++ b/xen/arch/x86/hvm/svm/nestedhvm.h
> > @@ -26,6 +26,13 @@
> >  #define nsvm_efer_svm_enabled(v) \
> >      (!!((v)->arch.hvm.guest_efer & EFER_SVME))
> >  
> > +#define NSVM_INTR_NOTHANDLED     3
> > +#define NSVM_INTR_NOTINTERCEPTED 2
> > +#define NSVM_INTR_FORCEVMEXIT    1
> > +#define NSVM_INTR_MASKED         0
> > +
> > +#ifdef CONFIG_NESTED_VIRT
> > +
> >  int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr);
> >  void nestedsvm_vmexit_defer(struct vcpu *v,
> >      uint64_t exitcode, uint64_t exitinfo1, uint64_t exitinfo2);
> > @@ -57,13 +64,40 @@ int cf_check nsvm_hap_walk_L1_p2m(
> >      struct vcpu *v, paddr_t L2_gpa, paddr_t *L1_gpa, unsigned int *page_order,
> >      uint8_t *p2m_acc, struct npfec npfec);
> >  
> > -#define NSVM_INTR_NOTHANDLED     3
> > -#define NSVM_INTR_NOTINTERCEPTED 2
> > -#define NSVM_INTR_FORCEVMEXIT    1
> > -#define NSVM_INTR_MASKED         0
> > -
> >  int nestedsvm_vcpu_interrupt(struct vcpu *v, const struct hvm_intack intack);
> >  
> > +#else /* !CONFIG_NESTED_VIRT */
> > +
> > +static inline int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr)
> > +{
> > +    return 0;
> > +}
> > +static inline void nestedsvm_vmexit_defer(struct vcpu *v,
> > +    uint64_t exitcode, uint64_t exitinfo1, uint64_t exitinfo2) { }
> > +static inline enum nestedhvm_vmexits nestedsvm_vmexit_n2n1(struct vcpu *v,
> > +    struct cpu_user_regs *regs)
> > +{
> > +    return NESTEDHVM_VMEXIT_ERROR;
> > +}
> > +static inline enum nestedhvm_vmexits nestedsvm_check_intercepts(struct vcpu *v,
> > +    struct cpu_user_regs *regs, uint64_t exitcode)
> > +{
> > +    return NESTEDHVM_VMEXIT_ERROR;
> > +}
> > +static inline void svm_nested_features_on_efer_update(struct vcpu *v) { }
> > +static inline void svm_vmexit_do_clgi(struct cpu_user_regs *regs,
> > +                                      struct vcpu *v) { }
> > +static inline void svm_vmexit_do_stgi(struct cpu_user_regs *regs,
> > +                                       struct vcpu *v) { }
> > +static inline bool nestedsvm_gif_isset(struct vcpu *v) { return true; }
> > +static inline int nestedsvm_vcpu_interrupt(struct vcpu *v,
> > +                                           const struct hvm_intack intack)
> > +{
> > +    return NSVM_INTR_NOTINTERCEPTED;
> > +}
> > +
> > +#endif /* CONFIG_NESTED_VIRT */
> > +
> >  #endif /* __X86_HVM_SVM_NESTEDHVM_PRIV_H__ */
> >  
> >  /*
> > diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
> > index 18ba837738..0234b57afb 100644
> > --- a/xen/arch/x86/hvm/svm/svm.c
> > +++ b/xen/arch/x86/hvm/svm/svm.c
> > @@ -46,6 +46,10 @@
> >  
> >  void noreturn svm_asm_do_resume(void);
> >  
> > +#ifndef CONFIG_NESTED_VIRT
> > +void asmlinkage nsvm_vcpu_switch(void) { }
> > +#endif
> > +
> >  u32 svm_feature_flags;
> >  
> >  /*
> > @@ -2465,6 +2469,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
> >      .set_rdtsc_exiting    = svm_set_rdtsc_exiting,
> >      .get_insn_bytes       = svm_get_insn_bytes,
> >  
> > +#ifdef CONFIG_NESTED_VIRT
> >      .nhvm_vcpu_initialise = nsvm_vcpu_initialise,
> >      .nhvm_vcpu_destroy = nsvm_vcpu_destroy,
> >      .nhvm_vcpu_reset = nsvm_vcpu_reset,
> > @@ -2474,6 +2479,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
> >      .nhvm_vmcx_hap_enabled = nsvm_vmcb_hap_enabled,
> >      .nhvm_intr_blocked = nsvm_intr_blocked,
> >      .nhvm_hap_walk_L1_p2m = nsvm_hap_walk_L1_p2m,
> > +#endif
> >  
> >      .get_reg = svm_get_reg,
> >      .set_reg = svm_set_reg,
> > diff --git a/xen/arch/x86/hvm/vmx/Makefile b/xen/arch/x86/hvm/vmx/Makefile
> > index 04a29ce59d..902564b3e2 100644
> > --- a/xen/arch/x86/hvm/vmx/Makefile
> > +++ b/xen/arch/x86/hvm/vmx/Makefile
> > @@ -3,4 +3,4 @@ obj-y += intr.o
> >  obj-y += realmode.o
> >  obj-y += vmcs.o
> >  obj-y += vmx.o
> > -obj-y += vvmx.o
> > +obj-$(CONFIG_NESTED_VIRT) += vvmx.o
> > diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> > index 82c55f49ae..252f27322b 100644
> > --- a/xen/arch/x86/hvm/vmx/vmx.c
> > +++ b/xen/arch/x86/hvm/vmx/vmx.c
> > @@ -55,6 +55,10 @@
> >  #include <public/hvm/save.h>
> >  #include <public/sched.h>
> >  
> > +#ifndef CONFIG_NESTED_VIRT
> > +void asmlinkage nvmx_switch_guest(void) { }
> > +#endif
> > +
> >  static bool __initdata opt_force_ept;
> >  boolean_param("force-ept", opt_force_ept);
> >  
> > @@ -2033,7 +2037,7 @@ static void nvmx_enqueue_n2_exceptions(struct vcpu *v,
> >                   nvmx->intr.intr_info, nvmx->intr.error_code);
> >  }
> >  
> > -static int cf_check nvmx_vmexit_event(
> > +static int cf_check __maybe_unused nvmx_vmexit_event(
> >      struct vcpu *v, const struct x86_event *event)
> >  {
> >      nvmx_enqueue_n2_exceptions(v, event->vector, event->error_code,
> > @@ -2933,6 +2937,7 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
> >      .handle_cd            = vmx_handle_cd,
> >      .set_info_guest       = vmx_set_info_guest,
> >      .set_rdtsc_exiting    = vmx_set_rdtsc_exiting,
> > +#ifdef CONFIG_NESTED_VIRT
> >      .nhvm_vcpu_initialise = nvmx_vcpu_initialise,
> >      .nhvm_vcpu_destroy    = nvmx_vcpu_destroy,
> >      .nhvm_vcpu_reset      = nvmx_vcpu_reset,
> > @@ -2942,8 +2947,9 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
> >      .nhvm_vcpu_vmexit_event = nvmx_vmexit_event,
> >      .nhvm_intr_blocked    = nvmx_intr_blocked,
> >      .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources,
> > -    .update_vlapic_mode = vmx_vlapic_msr_changed,
> >      .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
> > +#endif
> > +    .update_vlapic_mode = vmx_vlapic_msr_changed,
> >  #ifdef CONFIG_VM_EVENT
> >      .enable_msr_interception = vmx_enable_msr_interception,
> >  #endif
> > diff --git a/xen/arch/x86/include/asm/hvm/nestedhvm.h b/xen/arch/x86/include/asm/hvm/nestedhvm.h
> > index ea2c1bc328..0372974b24 100644
> > --- a/xen/arch/x86/include/asm/hvm/nestedhvm.h
> > +++ b/xen/arch/x86/include/asm/hvm/nestedhvm.h
> > @@ -25,9 +25,21 @@ enum nestedhvm_vmexits {
> >  /* Nested HVM on/off per domain */
> >  static inline bool nestedhvm_enabled(const struct domain *d)
> >  {
> > -    return IS_ENABLED(CONFIG_HVM) && (d->options & XEN_DOMCTL_CDF_nested_virt);
> > +    return IS_ENABLED(CONFIG_NESTED_VIRT) &&
> > +           (d->options & XEN_DOMCTL_CDF_nested_virt);
> >  }
> >  
> > +/* Nested paging */
> > +#define NESTEDHVM_PAGEFAULT_DONE       0
> > +#define NESTEDHVM_PAGEFAULT_INJECT     1
> > +#define NESTEDHVM_PAGEFAULT_L1_ERROR   2
> > +#define NESTEDHVM_PAGEFAULT_L0_ERROR   3
> > +#define NESTEDHVM_PAGEFAULT_MMIO       4
> > +#define NESTEDHVM_PAGEFAULT_RETRY      5
> > +#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
> > +
> > +#ifdef CONFIG_NESTED_VIRT
> > +
> >  /* Nested VCPU */
> >  int nestedhvm_vcpu_initialise(struct vcpu *v);
> >  void nestedhvm_vcpu_destroy(struct vcpu *v);
> > @@ -38,14 +50,6 @@ bool nestedhvm_vcpu_in_guestmode(struct vcpu *v);
> >  #define nestedhvm_vcpu_exit_guestmode(v)  \
> >      vcpu_nestedhvm(v).nv_guestmode = 0
> >  
> > -/* Nested paging */
> > -#define NESTEDHVM_PAGEFAULT_DONE       0
> > -#define NESTEDHVM_PAGEFAULT_INJECT     1
> > -#define NESTEDHVM_PAGEFAULT_L1_ERROR   2
> > -#define NESTEDHVM_PAGEFAULT_L0_ERROR   3
> > -#define NESTEDHVM_PAGEFAULT_MMIO       4
> > -#define NESTEDHVM_PAGEFAULT_RETRY      5
> > -#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
> >  int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
> >                                      struct npfec npfec);
> >  
> > @@ -59,6 +63,25 @@ unsigned long *nestedhvm_vcpu_iomap_get(bool ioport_80, bool ioport_ed);
> >  
> >  void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m);
> >  
> > +#else /* !CONFIG_NESTED_VIRT */
> > +
> > +static inline int nestedhvm_vcpu_initialise(struct vcpu *v) { return 0; }
> > +static inline void nestedhvm_vcpu_destroy(struct vcpu *v) { }
> > +static inline void nestedhvm_vcpu_reset(struct vcpu *v) { }
> > +static inline bool nestedhvm_vcpu_in_guestmode(struct vcpu *v) { return false; }
> > +static inline int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
> > +                                                  struct npfec npfec)
> > +{
> > +    return NESTEDHVM_PAGEFAULT_L0_ERROR;
> > +}
> > +#define nestedhvm_vcpu_enter_guestmode(v) do { } while (0)
> > +#define nestedhvm_vcpu_exit_guestmode(v)  do { } while (0)
> > +#define nestedhvm_paging_mode_hap(v) false
> > +#define nestedhvm_vmswitch_in_progress(v) false
> > +static inline void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m) { }
> > +
> > +#endif /* CONFIG_NESTED_VIRT */
> > +
> >  static inline bool nestedhvm_is_n2(struct vcpu *v)
> >  {
> >      if ( !nestedhvm_enabled(v->domain) ||
> > diff --git a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
> > index da10d3fa96..8dc876a4c2 100644
> > --- a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
> > +++ b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
> > @@ -73,6 +73,8 @@ union vmx_inst_info {
> >      u32 word;
> >  };
> >  
> > +#ifdef CONFIG_NESTED_VIRT
> > +
> >  int cf_check nvmx_vcpu_initialise(struct vcpu *v);
> >  void cf_check nvmx_vcpu_destroy(struct vcpu *v);
> >  int cf_check nvmx_vcpu_reset(struct vcpu *v);
> > @@ -199,5 +201,33 @@ int nept_translate_l2ga(struct vcpu *v, paddr_t l2ga,
> >                          uint64_t *exit_qual, uint32_t *exit_reason);
> >  int nvmx_cpu_up_prepare(unsigned int cpu);
> >  void nvmx_cpu_dead(unsigned int cpu);
> > +
> > +#else /* !CONFIG_NESTED_VIRT */
> > +
> > +static inline void nvmx_update_exec_control(struct vcpu *v, u32 value) { }
> > +static inline void nvmx_update_secondary_exec_control(struct vcpu *v,
> > +                                                      unsigned long value) { }
> > +static inline void nvmx_update_exception_bitmap(struct vcpu *v,
> > +                                                unsigned long value) { }
> > +static inline u64 nvmx_get_tsc_offset(struct vcpu *v) { return 0; }
> > +static inline void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr) { }
> > +static inline bool nvmx_intercepts_exception(struct vcpu *v, unsigned int vector,
> > +                                             int error_code) { return false; }
> > +static inline int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs,
> > +                                         unsigned int exit_reason) { return 0; }
> > +static inline void nvmx_idtv_handling(void) { }
> > +static inline int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content)
> > +{
> > +    return 0;
> > +}
> > +static inline int nvmx_handle_vmx_insn(struct cpu_user_regs *regs,
> > +                                       unsigned int exit_reason) { return 0; }
> > +static inline int nvmx_cpu_up_prepare(unsigned int cpu) { return 0; }
> > +static inline void nvmx_cpu_dead(unsigned int cpu) { }
> > +
> > +#define get_vvmcs(vcpu, encoding) 0
> > +
> > +#endif /* CONFIG_NESTED_VIRT */
> > +
> >  #endif /* __ASM_X86_HVM_VVMX_H__ */
> >  
> > diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
> > index 960f6e8409..aa15811c2e 100644
> > --- a/xen/arch/x86/mm/Makefile
> > +++ b/xen/arch/x86/mm/Makefile
> > @@ -7,7 +7,7 @@ obj-$(CONFIG_SHADOW_PAGING) += guest_walk_4.o
> >  obj-$(CONFIG_VM_EVENT) += mem_access.o
> >  obj-$(CONFIG_MEM_PAGING) += mem_paging.o
> >  obj-$(CONFIG_MEM_SHARING) += mem_sharing.o
> > -obj-$(CONFIG_HVM) += nested.o
> > +obj-$(CONFIG_NESTED_VIRT) += nested.o
> >  obj-$(CONFIG_HVM) += p2m.o
> >  obj-y += p2m-basic.o
> >  obj-$(CONFIG_INTEL_VMX) += p2m-ept.o
> > diff --git a/xen/arch/x86/mm/hap/Makefile b/xen/arch/x86/mm/hap/Makefile
> > index 67c29b2162..de1bb3abde 100644
> > --- a/xen/arch/x86/mm/hap/Makefile
> > +++ b/xen/arch/x86/mm/hap/Makefile
> > @@ -2,5 +2,5 @@ obj-y += hap.o
> >  obj-y += guest_walk_2.o
> >  obj-y += guest_walk_3.o
> >  obj-y += guest_walk_4.o
> > -obj-y += nested_hap.o
> > -obj-$(CONFIG_INTEL_VMX) += nested_ept.o
> > +obj-$(CONFIG_NESTED_VIRT) += nested_hap.o
> > +obj-$(CONFIG_NESTED_VIRT) += nested_ept.o
> 
> With this change nested_ept.o is no longer gated explicitly on
> CONFIG_INTEL_VMX, which could cause build issues if you have a Kconfig
> like:
> 
> CONFIG_INTEL_VMX=n
> CONFIG_AMD_SVM=y
> CONFIG_NESTED_VIRT=y
> 
> Does the code in nested_ept.o have dependencies on other files gated
> by CONFIG_INTEL_VMX, and hence would fail at the linking stage?  And
> even if it builds, the code in nested_ept.o would be unreachable I
> expect.

It does build, but you are right. I'll improve this.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH] x86/hvm: Add Kconfig option to disable nested virtualization
@ 2026-02-13 22:02 Stefano Stabellini
  2026-02-13 23:48 ` Demi Marie Obenour
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Stefano Stabellini @ 2026-02-13 22:02 UTC (permalink / raw)
  To: xen-devel
  Cc: roger.pau, jbeulich, andrew.cooper3, jason.andryuk,
	alejandro.garciavallejo, stefano.stabellini

Introduce CONFIG_NESTED_VIRT (default n) to allow nested virtualization
support to be disabled at build time. This is useful for embedded or
safety-focused deployments where nested virtualization is not needed,
reducing code size and attack surface.

When CONFIG_NESTED_VIRT=n, the following source files are excluded:
- arch/x86/hvm/nestedhvm.c
- arch/x86/hvm/svm/nestedsvm.c
- arch/x86/hvm/vmx/vvmx.c
- arch/x86/mm/nested.c
- arch/x86/mm/hap/nested_hap.c
- arch/x86/mm/hap/nested_ept.c

Add inline stubs where needed in headers. Guard assembly code paths
for nested virt with #ifdef CONFIG_NESTED_VIRT. Move exception
injection for VMX/SVM instructions to the callers in vmx.c/svm.c to
avoid header dependency issues in the stubs.

No functional change when CONFIG_NESTED_VIRT=y.

Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>

---
Changes in v3:
- Kconfig: Change "depends on AMD_SVM || INTEL_VMX" to "depends on HVM"
- Kconfig: Remove redundant "default n" line
- Kconfig: Remove "If unsure, say N." from help text
- mm/hap/Makefile: Simplify using intermediate nested-y variable:
    nested-y := nested_hap.o
    nested-$(CONFIG_INTEL_VMX) += nested_ept.o
    obj-$(CONFIG_NESTED_VIRT) += $(nested-y)
- svm/nestedhvm.h: Remove #ifdef CONFIG_NESTED_VIRT stubs, keep only
  function declarations (the functions are only called from code that
  is already compiled out when nested virt is disabled)
- svm/nestedhvm.h: Add CONFIG_NESTED_VIRT guard to nsvm_efer_svm_enabled
  macro to return false when nested virt is disabled
- svm/svm.c: Move #UD injection for STGI/CLGI to the caller instead of
  stub functions, checking nestedhvm_enabled()/nsvm_efer_svm_enabled()
- svm/svm.c: Mark svm_vmexit_do_vmrun/vmload/vmsave as __maybe_unused
- svm/svm.c: Remove empty nsvm_vcpu_switch stub (now guarded in asm)
- svm/entry.S: Add #ifdef CONFIG_NESTED_VIRT guards around nested virt
  specific code paths
- vmx/vmx.c: Remove empty nvmx_switch_guest stub (now guarded in asm)
- vmx/vmx.c: Move nvmx_enqueue_n2_exceptions and nvmx_vmexit_event to
  vvmx.c where they belong
- vmx/vvmx.h: Add declarations for nvmx_vmexit_event and
  nvmx_enqueue_n2_exceptions
- vmx/vvmx.h: Fix nvmx_msr_read_intercept stub comment
- vmx/vvmx.h: nvmx_handle_vmx_insn stub returns X86EMUL_EXCEPTION with
  ASSERT_UNREACHABLE (caller handles injection)
- vmx/vvmx.h: Convert get_vvmcs macro to inline function in stubs
- vmx/entry.S: Add #ifdef CONFIG_NESTED_VIRT guard around nvmx_switch_guest
- nestedhvm.h: Convert macro stubs to proper inline functions
---
 xen/arch/x86/hvm/Kconfig                 |  7 +++
 xen/arch/x86/hvm/Makefile                |  2 +-
 xen/arch/x86/hvm/svm/Makefile            |  2 +-
 xen/arch/x86/hvm/svm/entry.S             |  4 ++
 xen/arch/x86/hvm/svm/nestedhvm.h         |  2 +-
 xen/arch/x86/hvm/svm/svm.c               | 18 ++++--
 xen/arch/x86/hvm/vmx/Makefile            |  2 +-
 xen/arch/x86/hvm/vmx/entry.S             |  2 +
 xen/arch/x86/hvm/vmx/vmx.c               | 31 +---------
 xen/arch/x86/hvm/vmx/vvmx.c              | 26 +++++++++
 xen/arch/x86/include/asm/hvm/hvm.h       |  2 +-
 xen/arch/x86/include/asm/hvm/nestedhvm.h | 64 +++++++++++++++++---
 xen/arch/x86/include/asm/hvm/vmx/vvmx.h  | 74 ++++++++++++++++++++++++
 xen/arch/x86/mm/Makefile                 |  2 +-
 xen/arch/x86/mm/hap/Makefile             |  5 +-
 xen/arch/x86/mm/p2m.h                    |  6 ++
 xen/arch/x86/sysctl.c                    |  2 +
 xen/include/public/sysctl.h              |  4 +-
 18 files changed, 204 insertions(+), 51 deletions(-)

diff --git a/xen/arch/x86/hvm/Kconfig b/xen/arch/x86/hvm/Kconfig
index f32bf5cbb7..af661385b5 100644
--- a/xen/arch/x86/hvm/Kconfig
+++ b/xen/arch/x86/hvm/Kconfig
@@ -92,4 +92,11 @@ config MEM_SHARING
 	bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED
 	depends on INTEL_VMX
 
+config NESTED_VIRT
+	bool "Nested virtualization support"
+	depends on HVM
+	help
+	  Enable nested virtualization, allowing guests to run their own
+	  hypervisors. This requires hardware support.
+
 endif
diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
index f34fb03934..b8a0a68624 100644
--- a/xen/arch/x86/hvm/Makefile
+++ b/xen/arch/x86/hvm/Makefile
@@ -18,7 +18,7 @@ obj-y += irq.o
 obj-y += mmio.o
 obj-$(CONFIG_VM_EVENT) += monitor.o
 obj-y += mtrr.o
-obj-y += nestedhvm.o
+obj-$(CONFIG_NESTED_VIRT) += nestedhvm.o
 obj-y += pmtimer.o
 obj-y += quirks.o
 obj-y += rtc.o
diff --git a/xen/arch/x86/hvm/svm/Makefile b/xen/arch/x86/hvm/svm/Makefile
index 8a072cafd5..92418e3444 100644
--- a/xen/arch/x86/hvm/svm/Makefile
+++ b/xen/arch/x86/hvm/svm/Makefile
@@ -2,6 +2,6 @@ obj-y += asid.o
 obj-y += emulate.o
 obj-bin-y += entry.o
 obj-y += intr.o
-obj-y += nestedsvm.o
+obj-$(CONFIG_NESTED_VIRT) += nestedsvm.o
 obj-y += svm.o
 obj-y += vmcb.o
diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S
index af8db23b03..7964c80750 100644
--- a/xen/arch/x86/hvm/svm/entry.S
+++ b/xen/arch/x86/hvm/svm/entry.S
@@ -28,7 +28,9 @@ FUNC(svm_asm_do_resume)
         GET_CURRENT(bx)
 .Lsvm_do_resume:
         call svm_intr_assist
+#ifdef CONFIG_NESTED_VIRT
         call nsvm_vcpu_switch
+#endif
         ASSERT_NOT_IN_ATOMIC
 
         mov  VCPU_processor(%rbx),%eax
@@ -39,6 +41,7 @@ FUNC(svm_asm_do_resume)
         cmp  %ecx,(%rdx,%rax,1)
         jne  .Lsvm_process_softirqs
 
+#ifdef CONFIG_NESTED_VIRT
         cmp  %cl,VCPU_nsvm_hap_enabled(%rbx)
 UNLIKELY_START(ne, nsvm_hap)
         cmp  %rcx,VCPU_nhvm_p2m(%rbx)
@@ -52,6 +55,7 @@ UNLIKELY_START(ne, nsvm_hap)
         sti
         jmp  .Lsvm_do_resume
 __UNLIKELY_END(nsvm_hap)
+#endif
 
         call svm_vmenter_helper
 
diff --git a/xen/arch/x86/hvm/svm/nestedhvm.h b/xen/arch/x86/hvm/svm/nestedhvm.h
index 9bfed5ffd7..5cb85410f8 100644
--- a/xen/arch/x86/hvm/svm/nestedhvm.h
+++ b/xen/arch/x86/hvm/svm/nestedhvm.h
@@ -24,7 +24,7 @@
 
 /* True when l1 guest enabled SVM in EFER */
 #define nsvm_efer_svm_enabled(v) \
-    (!!((v)->arch.hvm.guest_efer & EFER_SVME))
+    (IS_ENABLED(CONFIG_NESTED_VIRT) && ((v)->arch.hvm.guest_efer & EFER_SVME))
 
 int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr);
 void nestedsvm_vmexit_defer(struct vcpu *v,
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 18ba837738..2cabc89fb5 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -2165,7 +2165,7 @@ static void svm_vmexit_do_pause(struct cpu_user_regs *regs)
     vcpu_yield();
 }
 
-static void
+static void __maybe_unused
 svm_vmexit_do_vmrun(struct cpu_user_regs *regs,
                     struct vcpu *v, uint64_t vmcbaddr)
 {
@@ -2211,7 +2211,7 @@ nsvm_get_nvmcb_page(struct vcpu *v, uint64_t vmcbaddr)
     return  page;
 }
 
-static void
+static void __maybe_unused
 svm_vmexit_do_vmload(struct vmcb_struct *vmcb,
                      struct cpu_user_regs *regs,
                      struct vcpu *v, uint64_t vmcbaddr)
@@ -2246,7 +2246,7 @@ svm_vmexit_do_vmload(struct vmcb_struct *vmcb,
     __update_guest_eip(regs, inst_len);
 }
 
-static void
+static void __maybe_unused
 svm_vmexit_do_vmsave(struct vmcb_struct *vmcb,
                      struct cpu_user_regs *regs,
                      struct vcpu *v, uint64_t vmcbaddr)
@@ -2465,6 +2465,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
     .set_rdtsc_exiting    = svm_set_rdtsc_exiting,
     .get_insn_bytes       = svm_get_insn_bytes,
 
+#ifdef CONFIG_NESTED_VIRT
     .nhvm_vcpu_initialise = nsvm_vcpu_initialise,
     .nhvm_vcpu_destroy = nsvm_vcpu_destroy,
     .nhvm_vcpu_reset = nsvm_vcpu_reset,
@@ -2474,6 +2475,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
     .nhvm_vmcx_hap_enabled = nsvm_vmcb_hap_enabled,
     .nhvm_intr_blocked = nsvm_intr_blocked,
     .nhvm_hap_walk_L1_p2m = nsvm_hap_walk_L1_p2m,
+#endif
 
     .get_reg = svm_get_reg,
     .set_reg = svm_set_reg,
@@ -3011,10 +3013,16 @@ void asmlinkage svm_vmexit_handler(void)
         svm_vmexit_do_vmsave(vmcb, regs, v, regs->rax);
         break;
     case VMEXIT_STGI:
-        svm_vmexit_do_stgi(regs, v);
+        if ( !nestedhvm_enabled(v->domain) )
+            hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
+        else
+            svm_vmexit_do_stgi(regs, v);
         break;
     case VMEXIT_CLGI:
-        svm_vmexit_do_clgi(regs, v);
+        if ( !nsvm_efer_svm_enabled(v) )
+            hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
+        else
+            svm_vmexit_do_clgi(regs, v);
         break;
 
     case VMEXIT_XSETBV:
diff --git a/xen/arch/x86/hvm/vmx/Makefile b/xen/arch/x86/hvm/vmx/Makefile
index 04a29ce59d..902564b3e2 100644
--- a/xen/arch/x86/hvm/vmx/Makefile
+++ b/xen/arch/x86/hvm/vmx/Makefile
@@ -3,4 +3,4 @@ obj-y += intr.o
 obj-y += realmode.o
 obj-y += vmcs.o
 obj-y += vmx.o
-obj-y += vvmx.o
+obj-$(CONFIG_NESTED_VIRT) += vvmx.o
diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
index 2bfee715b3..4d62efddf4 100644
--- a/xen/arch/x86/hvm/vmx/entry.S
+++ b/xen/arch/x86/hvm/vmx/entry.S
@@ -83,7 +83,9 @@ FUNC(vmx_asm_vmexit_handler)
 
 .Lvmx_do_vmentry:
         call vmx_intr_assist
+#ifdef CONFIG_NESTED_VIRT
         call nvmx_switch_guest
+#endif
         ASSERT_NOT_IN_ATOMIC
 
         mov  VCPU_processor(%rbx),%eax
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 82c55f49ae..4e3c8018d2 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2014,33 +2014,6 @@ static void cf_check vmx_update_guest_efer(struct vcpu *v)
         vmx_set_msr_intercept(v, MSR_EFER, VMX_MSR_R);
 }
 
-static void nvmx_enqueue_n2_exceptions(struct vcpu *v,
-            unsigned long intr_fields, int error_code, uint8_t source)
-{
-    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
-
-    if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) ) {
-        /* enqueue the exception till the VMCS switch back to L1 */
-        nvmx->intr.intr_info = intr_fields;
-        nvmx->intr.error_code = error_code;
-        nvmx->intr.source = source;
-        vcpu_nestedhvm(v).nv_vmexit_pending = 1;
-        return;
-    }
-    else
-        gdprintk(XENLOG_ERR, "Double Fault on Nested Guest: exception %lx %x"
-                 "on %lx %x\n", intr_fields, error_code,
-                 nvmx->intr.intr_info, nvmx->intr.error_code);
-}
-
-static int cf_check nvmx_vmexit_event(
-    struct vcpu *v, const struct x86_event *event)
-{
-    nvmx_enqueue_n2_exceptions(v, event->vector, event->error_code,
-                               hvm_intsrc_none);
-    return NESTEDHVM_VMEXIT_DONE;
-}
-
 static void __vmx_inject_exception(int trap, int type, int error_code)
 {
     unsigned long intr_fields;
@@ -2933,6 +2906,7 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
     .handle_cd            = vmx_handle_cd,
     .set_info_guest       = vmx_set_info_guest,
     .set_rdtsc_exiting    = vmx_set_rdtsc_exiting,
+#ifdef CONFIG_NESTED_VIRT
     .nhvm_vcpu_initialise = nvmx_vcpu_initialise,
     .nhvm_vcpu_destroy    = nvmx_vcpu_destroy,
     .nhvm_vcpu_reset      = nvmx_vcpu_reset,
@@ -2942,8 +2916,9 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
     .nhvm_vcpu_vmexit_event = nvmx_vmexit_event,
     .nhvm_intr_blocked    = nvmx_intr_blocked,
     .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources,
-    .update_vlapic_mode = vmx_vlapic_msr_changed,
     .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
+#endif
+    .update_vlapic_mode = vmx_vlapic_msr_changed,
 #ifdef CONFIG_VM_EVENT
     .enable_msr_interception = vmx_enable_msr_interception,
 #endif
diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
index 38952f0696..2bb42678c5 100644
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -2821,6 +2821,32 @@ void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr)
     __vmwrite(read_shadow_field, v->arch.hvm.nvcpu.guest_cr[cr]);
 }
 
+void nvmx_enqueue_n2_exceptions(struct vcpu *v,
+            unsigned long intr_fields, int error_code, uint8_t source)
+{
+    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+
+    if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) ) {
+        /* enqueue the exception till the VMCS switch back to L1 */
+        nvmx->intr.intr_info = intr_fields;
+        nvmx->intr.error_code = error_code;
+        nvmx->intr.source = source;
+        vcpu_nestedhvm(v).nv_vmexit_pending = 1;
+        return;
+    }
+    else
+        gdprintk(XENLOG_ERR, "Double Fault on Nested Guest: exception %lx %x"
+                 "on %lx %x\n", intr_fields, error_code,
+                 nvmx->intr.intr_info, nvmx->intr.error_code);
+}
+
+int cf_check nvmx_vmexit_event(struct vcpu *v, const struct x86_event *event)
+{
+    nvmx_enqueue_n2_exceptions(v, event->vector, event->error_code,
+                               hvm_intsrc_none);
+    return NESTEDHVM_VMEXIT_DONE;
+}
+
 void __init start_nested_vmx(struct hvm_function_table *hvm_function_table)
 {
     /* TODO: Require hardware support before enabling nested virt */
diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
index 7d9774df59..536a38b450 100644
--- a/xen/arch/x86/include/asm/hvm/hvm.h
+++ b/xen/arch/x86/include/asm/hvm/hvm.h
@@ -711,7 +711,7 @@ static inline bool hvm_altp2m_supported(void)
 /* Returns true if we have the minimum hardware requirements for nested virt */
 static inline bool hvm_nested_virt_supported(void)
 {
-    return hvm_funcs.caps.nested_virt;
+    return IS_ENABLED(CONFIG_NESTED_VIRT) && hvm_funcs.caps.nested_virt;
 }
 
 #ifdef CONFIG_ALTP2M
diff --git a/xen/arch/x86/include/asm/hvm/nestedhvm.h b/xen/arch/x86/include/asm/hvm/nestedhvm.h
index ea2c1bc328..2f8209271a 100644
--- a/xen/arch/x86/include/asm/hvm/nestedhvm.h
+++ b/xen/arch/x86/include/asm/hvm/nestedhvm.h
@@ -25,9 +25,21 @@ enum nestedhvm_vmexits {
 /* Nested HVM on/off per domain */
 static inline bool nestedhvm_enabled(const struct domain *d)
 {
-    return IS_ENABLED(CONFIG_HVM) && (d->options & XEN_DOMCTL_CDF_nested_virt);
+    return IS_ENABLED(CONFIG_NESTED_VIRT) &&
+           (d->options & XEN_DOMCTL_CDF_nested_virt);
 }
 
+/* Nested paging */
+#define NESTEDHVM_PAGEFAULT_DONE       0
+#define NESTEDHVM_PAGEFAULT_INJECT     1
+#define NESTEDHVM_PAGEFAULT_L1_ERROR   2
+#define NESTEDHVM_PAGEFAULT_L0_ERROR   3
+#define NESTEDHVM_PAGEFAULT_MMIO       4
+#define NESTEDHVM_PAGEFAULT_RETRY      5
+#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
+
+#ifdef CONFIG_NESTED_VIRT
+
 /* Nested VCPU */
 int nestedhvm_vcpu_initialise(struct vcpu *v);
 void nestedhvm_vcpu_destroy(struct vcpu *v);
@@ -38,14 +50,6 @@ bool nestedhvm_vcpu_in_guestmode(struct vcpu *v);
 #define nestedhvm_vcpu_exit_guestmode(v)  \
     vcpu_nestedhvm(v).nv_guestmode = 0
 
-/* Nested paging */
-#define NESTEDHVM_PAGEFAULT_DONE       0
-#define NESTEDHVM_PAGEFAULT_INJECT     1
-#define NESTEDHVM_PAGEFAULT_L1_ERROR   2
-#define NESTEDHVM_PAGEFAULT_L0_ERROR   3
-#define NESTEDHVM_PAGEFAULT_MMIO       4
-#define NESTEDHVM_PAGEFAULT_RETRY      5
-#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
 int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
                                     struct npfec npfec);
 
@@ -59,6 +63,48 @@ unsigned long *nestedhvm_vcpu_iomap_get(bool ioport_80, bool ioport_ed);
 
 void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m);
 
+#else /* !CONFIG_NESTED_VIRT */
+
+static inline int nestedhvm_vcpu_initialise(struct vcpu *v)
+{
+    ASSERT_UNREACHABLE();
+    return -EOPNOTSUPP;
+}
+static inline void nestedhvm_vcpu_destroy(struct vcpu *v) { }
+static inline void nestedhvm_vcpu_reset(struct vcpu *v)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline bool nestedhvm_vcpu_in_guestmode(struct vcpu *v) { return false; }
+static inline int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
+                                                  struct npfec npfec)
+{
+    ASSERT_UNREACHABLE();
+    return NESTEDHVM_PAGEFAULT_L0_ERROR;
+}
+static inline void nestedhvm_vcpu_enter_guestmode(struct vcpu *v)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline void nestedhvm_vcpu_exit_guestmode(struct vcpu *v)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline bool nestedhvm_paging_mode_hap(struct vcpu *v)
+{
+    return false;
+}
+static inline bool nestedhvm_vmswitch_in_progress(struct vcpu *v)
+{
+    return false;
+}
+static inline void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m)
+{
+    ASSERT_UNREACHABLE();
+}
+
+#endif /* CONFIG_NESTED_VIRT */
+
 static inline bool nestedhvm_is_n2(struct vcpu *v)
 {
     if ( !nestedhvm_enabled(v->domain) ||
diff --git a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
index da10d3fa96..d0c1ae29f6 100644
--- a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
+++ b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
@@ -73,6 +73,8 @@ union vmx_inst_info {
     u32 word;
 };
 
+#ifdef CONFIG_NESTED_VIRT
+
 int cf_check nvmx_vcpu_initialise(struct vcpu *v);
 void cf_check nvmx_vcpu_destroy(struct vcpu *v);
 int cf_check nvmx_vcpu_reset(struct vcpu *v);
@@ -199,5 +201,77 @@ int nept_translate_l2ga(struct vcpu *v, paddr_t l2ga,
                         uint64_t *exit_qual, uint32_t *exit_reason);
 int nvmx_cpu_up_prepare(unsigned int cpu);
 void nvmx_cpu_dead(unsigned int cpu);
+int cf_check nvmx_vmexit_event(struct vcpu *v, const struct x86_event *event);
+void nvmx_enqueue_n2_exceptions(struct vcpu *v,
+            unsigned long intr_fields, int error_code, uint8_t source);
+
+#else /* !CONFIG_NESTED_VIRT */
+
+static inline void nvmx_update_exec_control(struct vcpu *v, u32 value)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline void nvmx_update_secondary_exec_control(struct vcpu *v,
+                                                      unsigned long value)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline void nvmx_update_exception_bitmap(struct vcpu *v,
+                                                unsigned long value)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline u64 nvmx_get_tsc_offset(struct vcpu *v)
+{
+    ASSERT_UNREACHABLE();
+    return 0;
+}
+static inline void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline bool nvmx_intercepts_exception(struct vcpu *v, unsigned int vector,
+                                             int error_code)
+{
+    ASSERT_UNREACHABLE();
+    return false;
+}
+static inline int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs,
+                                         unsigned int exit_reason)
+{
+    ASSERT_UNREACHABLE();
+    return 0;
+}
+static inline void nvmx_idtv_handling(void)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content)
+{
+    /* return 0 to trigger #GP */
+    return 0;
+}
+static inline int nvmx_handle_vmx_insn(struct cpu_user_regs *regs,
+                                       unsigned int exit_reason)
+{
+    ASSERT_UNREACHABLE();
+    return X86EMUL_EXCEPTION;
+}
+static inline int nvmx_cpu_up_prepare(unsigned int cpu) { return 0; }
+static inline void nvmx_cpu_dead(unsigned int cpu) { }
+static inline void nvmx_enqueue_n2_exceptions(struct vcpu *v,
+            unsigned long intr_fields, int error_code, uint8_t source)
+{
+    ASSERT_UNREACHABLE();
+}
+
+static inline u64 get_vvmcs(const struct vcpu *vcpu, u32 encoding)
+{
+    ASSERT_UNREACHABLE();
+    return 0;
+}
+
+#endif /* CONFIG_NESTED_VIRT */
+
 #endif /* __ASM_X86_HVM_VVMX_H__ */
 
diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index 960f6e8409..aa15811c2e 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_SHADOW_PAGING) += guest_walk_4.o
 obj-$(CONFIG_VM_EVENT) += mem_access.o
 obj-$(CONFIG_MEM_PAGING) += mem_paging.o
 obj-$(CONFIG_MEM_SHARING) += mem_sharing.o
-obj-$(CONFIG_HVM) += nested.o
+obj-$(CONFIG_NESTED_VIRT) += nested.o
 obj-$(CONFIG_HVM) += p2m.o
 obj-y += p2m-basic.o
 obj-$(CONFIG_INTEL_VMX) += p2m-ept.o
diff --git a/xen/arch/x86/mm/hap/Makefile b/xen/arch/x86/mm/hap/Makefile
index 67c29b2162..efdc91ea82 100644
--- a/xen/arch/x86/mm/hap/Makefile
+++ b/xen/arch/x86/mm/hap/Makefile
@@ -2,5 +2,6 @@ obj-y += hap.o
 obj-y += guest_walk_2.o
 obj-y += guest_walk_3.o
 obj-y += guest_walk_4.o
-obj-y += nested_hap.o
-obj-$(CONFIG_INTEL_VMX) += nested_ept.o
+nested-y := nested_hap.o
+nested-$(CONFIG_INTEL_VMX) += nested_ept.o
+obj-$(CONFIG_NESTED_VIRT) += $(nested-y)
diff --git a/xen/arch/x86/mm/p2m.h b/xen/arch/x86/mm/p2m.h
index 635f5a7f45..63808dddcc 100644
--- a/xen/arch/x86/mm/p2m.h
+++ b/xen/arch/x86/mm/p2m.h
@@ -25,9 +25,15 @@ void p2m_teardown_altp2m(struct domain *d);
 void p2m_flush_table_locked(struct p2m_domain *p2m);
 int __must_check p2m_remove_entry(struct p2m_domain *p2m, gfn_t gfn, mfn_t mfn,
                                   unsigned int page_order);
+#ifdef CONFIG_NESTED_VIRT
 void p2m_nestedp2m_init(struct p2m_domain *p2m);
 int p2m_init_nestedp2m(struct domain *d);
 void p2m_teardown_nestedp2m(struct domain *d);
+#else
+static inline void p2m_nestedp2m_init(struct p2m_domain *p2m) { }
+static inline int p2m_init_nestedp2m(struct domain *d) { return 0; }
+static inline void p2m_teardown_nestedp2m(struct domain *d) { }
+#endif
 
 int ept_p2m_init(struct p2m_domain *p2m);
 void ept_p2m_uninit(struct p2m_domain *p2m);
diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
index 1b04947516..b1d865e1c8 100644
--- a/xen/arch/x86/sysctl.c
+++ b/xen/arch/x86/sysctl.c
@@ -103,6 +103,8 @@ void arch_do_physinfo(struct xen_sysctl_physinfo *pi)
         pi->capabilities |= XEN_SYSCTL_PHYSCAP_hap;
     if ( IS_ENABLED(CONFIG_SHADOW_PAGING) )
         pi->capabilities |= XEN_SYSCTL_PHYSCAP_shadow;
+    if ( hvm_nested_virt_supported() )
+        pi->capabilities |= XEN_SYSCTL_PHYSCAP_nestedhvm;
 }
 
 long arch_do_sysctl(
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 66c9b65465..b4bd1dd7b2 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -100,9 +100,11 @@ struct xen_sysctl_tbuf_op {
 /* Xen supports the Grant v1 and/or v2 ABIs. */
 #define XEN_SYSCTL_PHYSCAP_gnttab_v1     (1u << 8)
 #define XEN_SYSCTL_PHYSCAP_gnttab_v2     (1u << 9)
+/* The platform supports nested HVM. */
+#define XEN_SYSCTL_PHYSCAP_nestedhvm     (1u << 10)
 
 /* Max XEN_SYSCTL_PHYSCAP_* constant.  Used for ABI checking. */
-#define XEN_SYSCTL_PHYSCAP_MAX XEN_SYSCTL_PHYSCAP_gnttab_v2
+#define XEN_SYSCTL_PHYSCAP_MAX XEN_SYSCTL_PHYSCAP_nestedhvm
 
 #if defined(__arm__) || defined(__aarch64__)
 #define XEN_SYSCTL_PHYSCAP_ARM_SVE_MASK  (0x1FU)
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] x86/hvm: Add Kconfig option to disable nested virtualization
  2026-02-13 22:02 Stefano Stabellini
@ 2026-02-13 23:48 ` Demi Marie Obenour
  2026-02-16 10:00 ` Jan Beulich
  2026-02-16 10:34 ` Alejandro Vallejo
  2 siblings, 0 replies; 10+ messages in thread
From: Demi Marie Obenour @ 2026-02-13 23:48 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: roger.pau, jbeulich, andrew.cooper3, jason.andryuk,
	alejandro.garciavallejo


[-- Attachment #1.1.1: Type: text/plain, Size: 4558 bytes --]

On 2/13/26 17:02, Stefano Stabellini wrote:
> Introduce CONFIG_NESTED_VIRT (default n) to allow nested virtualization
> support to be disabled at build time. This is useful for embedded or
> safety-focused deployments where nested virtualization is not needed,
> reducing code size and attack surface.
> 
> When CONFIG_NESTED_VIRT=n, the following source files are excluded:
> - arch/x86/hvm/nestedhvm.c
> - arch/x86/hvm/svm/nestedsvm.c
> - arch/x86/hvm/vmx/vvmx.c
> - arch/x86/mm/nested.c
> - arch/x86/mm/hap/nested_hap.c
> - arch/x86/mm/hap/nested_ept.c
> 
> Add inline stubs where needed in headers. Guard assembly code paths
> for nested virt with #ifdef CONFIG_NESTED_VIRT. Move exception
> injection for VMX/SVM instructions to the callers in vmx.c/svm.c to
> avoid header dependency issues in the stubs.
> 
> No functional change when CONFIG_NESTED_VIRT=y.
> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
> 
> ---
> Changes in v3:
> - Kconfig: Change "depends on AMD_SVM || INTEL_VMX" to "depends on HVM"
> - Kconfig: Remove redundant "default n" line
> - Kconfig: Remove "If unsure, say N." from help text
> - mm/hap/Makefile: Simplify using intermediate nested-y variable:
>     nested-y := nested_hap.o
>     nested-$(CONFIG_INTEL_VMX) += nested_ept.o
>     obj-$(CONFIG_NESTED_VIRT) += $(nested-y)
> - svm/nestedhvm.h: Remove #ifdef CONFIG_NESTED_VIRT stubs, keep only
>   function declarations (the functions are only called from code that
>   is already compiled out when nested virt is disabled)
> - svm/nestedhvm.h: Add CONFIG_NESTED_VIRT guard to nsvm_efer_svm_enabled
>   macro to return false when nested virt is disabled
> - svm/svm.c: Move #UD injection for STGI/CLGI to the caller instead of
>   stub functions, checking nestedhvm_enabled()/nsvm_efer_svm_enabled()
> - svm/svm.c: Mark svm_vmexit_do_vmrun/vmload/vmsave as __maybe_unused
> - svm/svm.c: Remove empty nsvm_vcpu_switch stub (now guarded in asm)
> - svm/entry.S: Add #ifdef CONFIG_NESTED_VIRT guards around nested virt
>   specific code paths
> - vmx/vmx.c: Remove empty nvmx_switch_guest stub (now guarded in asm)
> - vmx/vmx.c: Move nvmx_enqueue_n2_exceptions and nvmx_vmexit_event to
>   vvmx.c where they belong
> - vmx/vvmx.h: Add declarations for nvmx_vmexit_event and
>   nvmx_enqueue_n2_exceptions
> - vmx/vvmx.h: Fix nvmx_msr_read_intercept stub comment
> - vmx/vvmx.h: nvmx_handle_vmx_insn stub returns X86EMUL_EXCEPTION with
>   ASSERT_UNREACHABLE (caller handles injection)
> - vmx/vvmx.h: Convert get_vvmcs macro to inline function in stubs
> - vmx/entry.S: Add #ifdef CONFIG_NESTED_VIRT guard around nvmx_switch_guest
> - nestedhvm.h: Convert macro stubs to proper inline functions
> ---
>  xen/arch/x86/hvm/Kconfig                 |  7 +++
>  xen/arch/x86/hvm/Makefile                |  2 +-
>  xen/arch/x86/hvm/svm/Makefile            |  2 +-
>  xen/arch/x86/hvm/svm/entry.S             |  4 ++
>  xen/arch/x86/hvm/svm/nestedhvm.h         |  2 +-
>  xen/arch/x86/hvm/svm/svm.c               | 18 ++++--
>  xen/arch/x86/hvm/vmx/Makefile            |  2 +-
>  xen/arch/x86/hvm/vmx/entry.S             |  2 +
>  xen/arch/x86/hvm/vmx/vmx.c               | 31 +---------
>  xen/arch/x86/hvm/vmx/vvmx.c              | 26 +++++++++
>  xen/arch/x86/include/asm/hvm/hvm.h       |  2 +-
>  xen/arch/x86/include/asm/hvm/nestedhvm.h | 64 +++++++++++++++++---
>  xen/arch/x86/include/asm/hvm/vmx/vvmx.h  | 74 ++++++++++++++++++++++++
>  xen/arch/x86/mm/Makefile                 |  2 +-
>  xen/arch/x86/mm/hap/Makefile             |  5 +-
>  xen/arch/x86/mm/p2m.h                    |  6 ++
>  xen/arch/x86/sysctl.c                    |  2 +
>  xen/include/public/sysctl.h              |  4 +-
>  18 files changed, 204 insertions(+), 51 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/Kconfig b/xen/arch/x86/hvm/Kconfig
> index f32bf5cbb7..af661385b5 100644
> --- a/xen/arch/x86/hvm/Kconfig
> +++ b/xen/arch/x86/hvm/Kconfig
> @@ -92,4 +92,11 @@ config MEM_SHARING
>  	bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED
>  	depends on INTEL_VMX
>  
> +config NESTED_VIRT
> +	bool "Nested virtualization support> +	depends on HVM
> +	help
> +	  Enable nested virtualization, allowing guests to run their own
> +	  hypervisors. This requires hardware support.

Should this also come with a warning that allowing guests to use
nested virtualization is insecure unless both L1 and L2 guests are
trusted?
-- 
Sincerely,
Demi Marie Obenour (she/her/hers)

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 7253 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] x86/hvm: Add Kconfig option to disable nested virtualization
  2026-02-13 22:02 Stefano Stabellini
  2026-02-13 23:48 ` Demi Marie Obenour
@ 2026-02-16 10:00 ` Jan Beulich
  2026-02-16 10:34 ` Alejandro Vallejo
  2 siblings, 0 replies; 10+ messages in thread
From: Jan Beulich @ 2026-02-16 10:00 UTC (permalink / raw)
  To: Stefano Stabellini
  Cc: roger.pau, andrew.cooper3, jason.andryuk, alejandro.garciavallejo,
	xen-devel

On 13.02.2026 23:02, Stefano Stabellini wrote:
> Introduce CONFIG_NESTED_VIRT (default n) to allow nested virtualization
> support to be disabled at build time. This is useful for embedded or
> safety-focused deployments where nested virtualization is not needed,
> reducing code size and attack surface.
> 
> When CONFIG_NESTED_VIRT=n, the following source files are excluded:
> - arch/x86/hvm/nestedhvm.c
> - arch/x86/hvm/svm/nestedsvm.c
> - arch/x86/hvm/vmx/vvmx.c
> - arch/x86/mm/nested.c
> - arch/x86/mm/hap/nested_hap.c
> - arch/x86/mm/hap/nested_ept.c
> 
> Add inline stubs where needed in headers. Guard assembly code paths
> for nested virt with #ifdef CONFIG_NESTED_VIRT. Move exception
> injection for VMX/SVM instructions to the callers in vmx.c/svm.c to
> avoid header dependency issues in the stubs.
> 
> No functional change when CONFIG_NESTED_VIRT=y.
> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
> 
> ---
> Changes in v3:
> - Kconfig: Change "depends on AMD_SVM || INTEL_VMX" to "depends on HVM"
> - Kconfig: Remove redundant "default n" line
> - Kconfig: Remove "If unsure, say N." from help text
> - mm/hap/Makefile: Simplify using intermediate nested-y variable:
>     nested-y := nested_hap.o
>     nested-$(CONFIG_INTEL_VMX) += nested_ept.o
>     obj-$(CONFIG_NESTED_VIRT) += $(nested-y)
> - svm/nestedhvm.h: Remove #ifdef CONFIG_NESTED_VIRT stubs, keep only
>   function declarations (the functions are only called from code that
>   is already compiled out when nested virt is disabled)
> - svm/nestedhvm.h: Add CONFIG_NESTED_VIRT guard to nsvm_efer_svm_enabled
>   macro to return false when nested virt is disabled
> - svm/svm.c: Move #UD injection for STGI/CLGI to the caller instead of
>   stub functions, checking nestedhvm_enabled()/nsvm_efer_svm_enabled()
> - svm/svm.c: Mark svm_vmexit_do_vmrun/vmload/vmsave as __maybe_unused
> - svm/svm.c: Remove empty nsvm_vcpu_switch stub (now guarded in asm)
> - svm/entry.S: Add #ifdef CONFIG_NESTED_VIRT guards around nested virt
>   specific code paths
> - vmx/vmx.c: Remove empty nvmx_switch_guest stub (now guarded in asm)
> - vmx/vmx.c: Move nvmx_enqueue_n2_exceptions and nvmx_vmexit_event to
>   vvmx.c where they belong
> - vmx/vvmx.h: Add declarations for nvmx_vmexit_event and
>   nvmx_enqueue_n2_exceptions
> - vmx/vvmx.h: Fix nvmx_msr_read_intercept stub comment
> - vmx/vvmx.h: nvmx_handle_vmx_insn stub returns X86EMUL_EXCEPTION with
>   ASSERT_UNREACHABLE (caller handles injection)
> - vmx/vvmx.h: Convert get_vvmcs macro to inline function in stubs
> - vmx/entry.S: Add #ifdef CONFIG_NESTED_VIRT guard around nvmx_switch_guest
> - nestedhvm.h: Convert macro stubs to proper inline functions

Oh, wow, that's an almost complete re-write?

> --- a/xen/arch/x86/hvm/Kconfig
> +++ b/xen/arch/x86/hvm/Kconfig
> @@ -92,4 +92,11 @@ config MEM_SHARING
>  	bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED
>  	depends on INTEL_VMX
>  
> +config NESTED_VIRT
> +	bool "Nested virtualization support"
> +	depends on HVM
> +	help
> +	  Enable nested virtualization, allowing guests to run their own
> +	  hypervisors. This requires hardware support.

What's the last sentence about? HVM itself already requires hardware
support, yet that's about it especially for VMX (where only HAP is the
other requirement), isn't it? If this is about those advanced features,
perhaps this would then want to be more specific?

> --- a/xen/arch/x86/hvm/svm/nestedhvm.h
> +++ b/xen/arch/x86/hvm/svm/nestedhvm.h
> @@ -24,7 +24,7 @@
>  
>  /* True when l1 guest enabled SVM in EFER */
>  #define nsvm_efer_svm_enabled(v) \
> -    (!!((v)->arch.hvm.guest_efer & EFER_SVME))
> +    (IS_ENABLED(CONFIG_NESTED_VIRT) && ((v)->arch.hvm.guest_efer & EFER_SVME))

Constructs like these are on the edge: Yes, passing in an expression with a side
effect isn't very likely here. Yet still, this being a widely visible macro, I
wonder if it wouldn't better guarantee v to be evaluated exactly once.

> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -2165,7 +2165,7 @@ static void svm_vmexit_do_pause(struct cpu_user_regs *regs)
>      vcpu_yield();
>  }
>  
> -static void
> +static void __maybe_unused
>  svm_vmexit_do_vmrun(struct cpu_user_regs *regs,
>                      struct vcpu *v, uint64_t vmcbaddr)
>  {
> @@ -2211,7 +2211,7 @@ nsvm_get_nvmcb_page(struct vcpu *v, uint64_t vmcbaddr)
>      return  page;
>  }
>  
> -static void
> +static void __maybe_unused
>  svm_vmexit_do_vmload(struct vmcb_struct *vmcb,
>                       struct cpu_user_regs *regs,
>                       struct vcpu *v, uint64_t vmcbaddr)
> @@ -2246,7 +2246,7 @@ svm_vmexit_do_vmload(struct vmcb_struct *vmcb,
>      __update_guest_eip(regs, inst_len);
>  }
>  
> -static void
> +static void __maybe_unused
>  svm_vmexit_do_vmsave(struct vmcb_struct *vmcb,
>                       struct cpu_user_regs *regs,
>                       struct vcpu *v, uint64_t vmcbaddr)

Why are these needed? The call sites don't go away afaics.

If these are nevertheless needed, question is whether a suitable single #ifdef
might not be tidier.

> @@ -3011,10 +3013,16 @@ void asmlinkage svm_vmexit_handler(void)
>          svm_vmexit_do_vmsave(vmcb, regs, v, regs->rax);
>          break;
>      case VMEXIT_STGI:
> -        svm_vmexit_do_stgi(regs, v);
> +        if ( !nestedhvm_enabled(v->domain) )
> +            hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
> +        else
> +            svm_vmexit_do_stgi(regs, v);
>          break;
>      case VMEXIT_CLGI:
> -        svm_vmexit_do_clgi(regs, v);
> +        if ( !nsvm_efer_svm_enabled(v) )
> +            hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
> +        else
> +            svm_vmexit_do_clgi(regs, v);
>          break;

These render respective checks in the functions themselves dead, which in
particular means the bodies of those if()s there are then unreachable (a
Misra violation of a rule we did accept).

> @@ -2942,8 +2916,9 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
>      .nhvm_vcpu_vmexit_event = nvmx_vmexit_event,
>      .nhvm_intr_blocked    = nvmx_intr_blocked,
>      .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources,
> -    .update_vlapic_mode = vmx_vlapic_msr_changed,

I realize the = wasn't properly padded here, but ...

>      .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
> +#endif
> +    .update_vlapic_mode = vmx_vlapic_msr_changed,

... can you please to so while moving the line?

> --- a/xen/arch/x86/hvm/vmx/vvmx.c
> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
> @@ -2821,6 +2821,32 @@ void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr)
>      __vmwrite(read_shadow_field, v->arch.hvm.nvcpu.guest_cr[cr]);
>  }
>  
> +void nvmx_enqueue_n2_exceptions(struct vcpu *v,
> +            unsigned long intr_fields, int error_code, uint8_t source)

While moving, can obvious style issues please be addressed? Bad indentation
here, ...

> +{
> +    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
> +
> +    if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) ) {

... misplaced brace here, and ...

> +        /* enqueue the exception till the VMCS switch back to L1 */

... malformed comment here.

> --- a/xen/arch/x86/include/asm/hvm/hvm.h
> +++ b/xen/arch/x86/include/asm/hvm/hvm.h
> @@ -711,7 +711,7 @@ static inline bool hvm_altp2m_supported(void)
>  /* Returns true if we have the minimum hardware requirements for nested virt */
>  static inline bool hvm_nested_virt_supported(void)
>  {
> -    return hvm_funcs.caps.nested_virt;
> +    return IS_ENABLED(CONFIG_NESTED_VIRT) && hvm_funcs.caps.nested_virt;
>  }

Should the field itself perhaps become conditional?

> --- a/xen/arch/x86/include/asm/hvm/nestedhvm.h
> +++ b/xen/arch/x86/include/asm/hvm/nestedhvm.h
> @@ -25,9 +25,21 @@ enum nestedhvm_vmexits {
>  /* Nested HVM on/off per domain */
>  static inline bool nestedhvm_enabled(const struct domain *d)
>  {
> -    return IS_ENABLED(CONFIG_HVM) && (d->options & XEN_DOMCTL_CDF_nested_virt);
> +    return IS_ENABLED(CONFIG_NESTED_VIRT) &&
> +           (d->options & XEN_DOMCTL_CDF_nested_virt);
>  }
>  
> +/* Nested paging */
> +#define NESTEDHVM_PAGEFAULT_DONE       0
> +#define NESTEDHVM_PAGEFAULT_INJECT     1
> +#define NESTEDHVM_PAGEFAULT_L1_ERROR   2
> +#define NESTEDHVM_PAGEFAULT_L0_ERROR   3
> +#define NESTEDHVM_PAGEFAULT_MMIO       4
> +#define NESTEDHVM_PAGEFAULT_RETRY      5
> +#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
> +
> +#ifdef CONFIG_NESTED_VIRT

In a reply to my comment on v1 (Or was it v2? This submission isn't tagged.),
you referred me to the stub nestedhvm_hap_nested_page_fault() using the
constant. However, why would that stub be needed when the sole call site of
the function lives in a conditional using nestedhvm_enabled() (which is
compile-time fales when NESTED_VIRT=n)? All you need to make sure is that
the decl remains available. I then wonder for how many of the other stubs
which might be the case as well.

> @@ -199,5 +201,77 @@ int nept_translate_l2ga(struct vcpu *v, paddr_t l2ga,
>                          uint64_t *exit_qual, uint32_t *exit_reason);
>  int nvmx_cpu_up_prepare(unsigned int cpu);
>  void nvmx_cpu_dead(unsigned int cpu);
> +int cf_check nvmx_vmexit_event(struct vcpu *v, const struct x86_event *event);
> +void nvmx_enqueue_n2_exceptions(struct vcpu *v,
> +            unsigned long intr_fields, int error_code, uint8_t source);

Nit: Bad indentation even copied here.

Jan


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] x86/hvm: Add Kconfig option to disable nested virtualization
  2026-02-13 22:02 Stefano Stabellini
  2026-02-13 23:48 ` Demi Marie Obenour
  2026-02-16 10:00 ` Jan Beulich
@ 2026-02-16 10:34 ` Alejandro Vallejo
  2026-02-16 11:01   ` Jan Beulich
  2 siblings, 1 reply; 10+ messages in thread
From: Alejandro Vallejo @ 2026-02-16 10:34 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel
  Cc: roger.pau, jbeulich, andrew.cooper3, jason.andryuk

On Fri Feb 13, 2026 at 11:02 PM CET, Stefano Stabellini wrote:
> Introduce CONFIG_NESTED_VIRT (default n) to allow nested virtualization
> support to be disabled at build time. This is useful for embedded or
> safety-focused deployments where nested virtualization is not needed,
> reducing code size and attack surface.
>
> When CONFIG_NESTED_VIRT=n, the following source files are excluded:
> - arch/x86/hvm/nestedhvm.c
> - arch/x86/hvm/svm/nestedsvm.c
> - arch/x86/hvm/vmx/vvmx.c
> - arch/x86/mm/nested.c
> - arch/x86/mm/hap/nested_hap.c
> - arch/x86/mm/hap/nested_ept.c
>
> Add inline stubs where needed in headers. Guard assembly code paths
> for nested virt with #ifdef CONFIG_NESTED_VIRT. Move exception
> injection for VMX/SVM instructions to the callers in vmx.c/svm.c to
> avoid header dependency issues in the stubs.
>
> No functional change when CONFIG_NESTED_VIRT=y.
>
> Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
>
> ---
> Changes in v3:
> - Kconfig: Change "depends on AMD_SVM || INTEL_VMX" to "depends on HVM"
> - Kconfig: Remove redundant "default n" line
> - Kconfig: Remove "If unsure, say N." from help text
> - mm/hap/Makefile: Simplify using intermediate nested-y variable:
>     nested-y := nested_hap.o
>     nested-$(CONFIG_INTEL_VMX) += nested_ept.o
>     obj-$(CONFIG_NESTED_VIRT) += $(nested-y)
> - svm/nestedhvm.h: Remove #ifdef CONFIG_NESTED_VIRT stubs, keep only
>   function declarations (the functions are only called from code that
>   is already compiled out when nested virt is disabled)
> - svm/nestedhvm.h: Add CONFIG_NESTED_VIRT guard to nsvm_efer_svm_enabled
>   macro to return false when nested virt is disabled
> - svm/svm.c: Move #UD injection for STGI/CLGI to the caller instead of
>   stub functions, checking nestedhvm_enabled()/nsvm_efer_svm_enabled()
> - svm/svm.c: Mark svm_vmexit_do_vmrun/vmload/vmsave as __maybe_unused
> - svm/svm.c: Remove empty nsvm_vcpu_switch stub (now guarded in asm)
> - svm/entry.S: Add #ifdef CONFIG_NESTED_VIRT guards around nested virt
>   specific code paths
> - vmx/vmx.c: Remove empty nvmx_switch_guest stub (now guarded in asm)
> - vmx/vmx.c: Move nvmx_enqueue_n2_exceptions and nvmx_vmexit_event to
>   vvmx.c where they belong
> - vmx/vvmx.h: Add declarations for nvmx_vmexit_event and
>   nvmx_enqueue_n2_exceptions
> - vmx/vvmx.h: Fix nvmx_msr_read_intercept stub comment
> - vmx/vvmx.h: nvmx_handle_vmx_insn stub returns X86EMUL_EXCEPTION with
>   ASSERT_UNREACHABLE (caller handles injection)
> - vmx/vvmx.h: Convert get_vvmcs macro to inline function in stubs
> - vmx/entry.S: Add #ifdef CONFIG_NESTED_VIRT guard around nvmx_switch_guest
> - nestedhvm.h: Convert macro stubs to proper inline functions
> ---
>  xen/arch/x86/hvm/Kconfig                 |  7 +++
>  xen/arch/x86/hvm/Makefile                |  2 +-
>  xen/arch/x86/hvm/svm/Makefile            |  2 +-
>  xen/arch/x86/hvm/svm/entry.S             |  4 ++
>  xen/arch/x86/hvm/svm/nestedhvm.h         |  2 +-
>  xen/arch/x86/hvm/svm/svm.c               | 18 ++++--
>  xen/arch/x86/hvm/vmx/Makefile            |  2 +-
>  xen/arch/x86/hvm/vmx/entry.S             |  2 +
>  xen/arch/x86/hvm/vmx/vmx.c               | 31 +---------
>  xen/arch/x86/hvm/vmx/vvmx.c              | 26 +++++++++
>  xen/arch/x86/include/asm/hvm/hvm.h       |  2 +-
>  xen/arch/x86/include/asm/hvm/nestedhvm.h | 64 +++++++++++++++++---
>  xen/arch/x86/include/asm/hvm/vmx/vvmx.h  | 74 ++++++++++++++++++++++++
>  xen/arch/x86/mm/Makefile                 |  2 +-
>  xen/arch/x86/mm/hap/Makefile             |  5 +-
>  xen/arch/x86/mm/p2m.h                    |  6 ++
>  xen/arch/x86/sysctl.c                    |  2 +
>  xen/include/public/sysctl.h              |  4 +-
>  18 files changed, 204 insertions(+), 51 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/Kconfig b/xen/arch/x86/hvm/Kconfig
> index f32bf5cbb7..af661385b5 100644
> --- a/xen/arch/x86/hvm/Kconfig
> +++ b/xen/arch/x86/hvm/Kconfig
> @@ -92,4 +92,11 @@ config MEM_SHARING
>  	bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED
>  	depends on INTEL_VMX
>  
> +config NESTED_VIRT
> +	bool "Nested virtualization support"
> +	depends on HVM
> +	help
> +	  Enable nested virtualization, allowing guests to run their own
> +	  hypervisors. This requires hardware support.

nit: If we state above "allowing HVM guests..." rather than plain "guests" we can
then get rid of the "This requires hardware support line". What you probably
meant is that this is HVM-only and we don't allow PV nesting.

"This requires hardware support" makes me (the user) think my hardware needs
something special to support nesting, when in reality I just need HVM support.

> +
>  endif
> diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
> index f34fb03934..b8a0a68624 100644
> --- a/xen/arch/x86/hvm/Makefile
> +++ b/xen/arch/x86/hvm/Makefile
> @@ -18,7 +18,7 @@ obj-y += irq.o
>  obj-y += mmio.o
>  obj-$(CONFIG_VM_EVENT) += monitor.o
>  obj-y += mtrr.o
> -obj-y += nestedhvm.o
> +obj-$(CONFIG_NESTED_VIRT) += nestedhvm.o
>  obj-y += pmtimer.o
>  obj-y += quirks.o
>  obj-y += rtc.o
> diff --git a/xen/arch/x86/hvm/svm/Makefile b/xen/arch/x86/hvm/svm/Makefile
> index 8a072cafd5..92418e3444 100644
> --- a/xen/arch/x86/hvm/svm/Makefile
> +++ b/xen/arch/x86/hvm/svm/Makefile
> @@ -2,6 +2,6 @@ obj-y += asid.o
>  obj-y += emulate.o
>  obj-bin-y += entry.o
>  obj-y += intr.o
> -obj-y += nestedsvm.o
> +obj-$(CONFIG_NESTED_VIRT) += nestedsvm.o
>  obj-y += svm.o
>  obj-y += vmcb.o
> diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S
> index af8db23b03..7964c80750 100644
> --- a/xen/arch/x86/hvm/svm/entry.S
> +++ b/xen/arch/x86/hvm/svm/entry.S
> @@ -28,7 +28,9 @@ FUNC(svm_asm_do_resume)
>          GET_CURRENT(bx)
>  .Lsvm_do_resume:
>          call svm_intr_assist
> +#ifdef CONFIG_NESTED_VIRT
>          call nsvm_vcpu_switch
> +#endif
>          ASSERT_NOT_IN_ATOMIC
>  
>          mov  VCPU_processor(%rbx),%eax
> @@ -39,6 +41,7 @@ FUNC(svm_asm_do_resume)
>          cmp  %ecx,(%rdx,%rax,1)
>          jne  .Lsvm_process_softirqs
>  
> +#ifdef CONFIG_NESTED_VIRT
>          cmp  %cl,VCPU_nsvm_hap_enabled(%rbx)
>  UNLIKELY_START(ne, nsvm_hap)
>          cmp  %rcx,VCPU_nhvm_p2m(%rbx)
> @@ -52,6 +55,7 @@ UNLIKELY_START(ne, nsvm_hap)
>          sti
>          jmp  .Lsvm_do_resume
>  __UNLIKELY_END(nsvm_hap)
> +#endif
>  
>          call svm_vmenter_helper
>  
> diff --git a/xen/arch/x86/hvm/svm/nestedhvm.h b/xen/arch/x86/hvm/svm/nestedhvm.h
> index 9bfed5ffd7..5cb85410f8 100644
> --- a/xen/arch/x86/hvm/svm/nestedhvm.h
> +++ b/xen/arch/x86/hvm/svm/nestedhvm.h
> @@ -24,7 +24,7 @@
>  
>  /* True when l1 guest enabled SVM in EFER */
>  #define nsvm_efer_svm_enabled(v) \
> -    (!!((v)->arch.hvm.guest_efer & EFER_SVME))
> +    (IS_ENABLED(CONFIG_NESTED_VIRT) && ((v)->arch.hvm.guest_efer & EFER_SVME))
>  
>  int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr);
>  void nestedsvm_vmexit_defer(struct vcpu *v,
> diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
> index 18ba837738..2cabc89fb5 100644
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -2165,7 +2165,7 @@ static void svm_vmexit_do_pause(struct cpu_user_regs *regs)
>      vcpu_yield();
>  }
>  
> -static void
> +static void __maybe_unused
>  svm_vmexit_do_vmrun(struct cpu_user_regs *regs,
>                      struct vcpu *v, uint64_t vmcbaddr)
>  {
> @@ -2211,7 +2211,7 @@ nsvm_get_nvmcb_page(struct vcpu *v, uint64_t vmcbaddr)
>      return  page;
>  }
>  
> -static void
> +static void __maybe_unused
>  svm_vmexit_do_vmload(struct vmcb_struct *vmcb,
>                       struct cpu_user_regs *regs,
>                       struct vcpu *v, uint64_t vmcbaddr)
> @@ -2246,7 +2246,7 @@ svm_vmexit_do_vmload(struct vmcb_struct *vmcb,
>      __update_guest_eip(regs, inst_len);
>  }
>  
> -static void
> +static void __maybe_unused
>  svm_vmexit_do_vmsave(struct vmcb_struct *vmcb,
>                       struct cpu_user_regs *regs,
>                       struct vcpu *v, uint64_t vmcbaddr)
> @@ -2465,6 +2465,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
>      .set_rdtsc_exiting    = svm_set_rdtsc_exiting,
>      .get_insn_bytes       = svm_get_insn_bytes,
>  
> +#ifdef CONFIG_NESTED_VIRT
>      .nhvm_vcpu_initialise = nsvm_vcpu_initialise,
>      .nhvm_vcpu_destroy = nsvm_vcpu_destroy,
>      .nhvm_vcpu_reset = nsvm_vcpu_reset,
> @@ -2474,6 +2475,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
>      .nhvm_vmcx_hap_enabled = nsvm_vmcb_hap_enabled,
>      .nhvm_intr_blocked = nsvm_intr_blocked,
>      .nhvm_hap_walk_L1_p2m = nsvm_hap_walk_L1_p2m,
> +#endif
>  
>      .get_reg = svm_get_reg,
>      .set_reg = svm_set_reg,
> @@ -3011,10 +3013,16 @@ void asmlinkage svm_vmexit_handler(void)
>          svm_vmexit_do_vmsave(vmcb, regs, v, regs->rax);
>          break;
>      case VMEXIT_STGI:
> -        svm_vmexit_do_stgi(regs, v);
> +        if ( !nestedhvm_enabled(v->domain) )
> +            hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
> +        else
> +            svm_vmexit_do_stgi(regs, v);
>          break;
>      case VMEXIT_CLGI:
> -        svm_vmexit_do_clgi(regs, v);
> +        if ( !nsvm_efer_svm_enabled(v) )
> +            hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
> +        else
> +            svm_vmexit_do_clgi(regs, v);
>          break;

nit: For readability I'd consider reversing the polarity and putting the enabled
cases in the first branch.

>  
>      case VMEXIT_XSETBV:
> diff --git a/xen/arch/x86/hvm/vmx/Makefile b/xen/arch/x86/hvm/vmx/Makefile
> index 04a29ce59d..902564b3e2 100644
> --- a/xen/arch/x86/hvm/vmx/Makefile
> +++ b/xen/arch/x86/hvm/vmx/Makefile
> @@ -3,4 +3,4 @@ obj-y += intr.o
>  obj-y += realmode.o
>  obj-y += vmcs.o
>  obj-y += vmx.o
> -obj-y += vvmx.o
> +obj-$(CONFIG_NESTED_VIRT) += vvmx.o
> diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
> index 2bfee715b3..4d62efddf4 100644
> --- a/xen/arch/x86/hvm/vmx/entry.S
> +++ b/xen/arch/x86/hvm/vmx/entry.S
> @@ -83,7 +83,9 @@ FUNC(vmx_asm_vmexit_handler)
>  
>  .Lvmx_do_vmentry:
>          call vmx_intr_assist
> +#ifdef CONFIG_NESTED_VIRT
>          call nvmx_switch_guest
> +#endif
>          ASSERT_NOT_IN_ATOMIC
>  
>          mov  VCPU_processor(%rbx),%eax
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 82c55f49ae..4e3c8018d2 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -2014,33 +2014,6 @@ static void cf_check vmx_update_guest_efer(struct vcpu *v)
>          vmx_set_msr_intercept(v, MSR_EFER, VMX_MSR_R);
>  }
>  
> -static void nvmx_enqueue_n2_exceptions(struct vcpu *v,
> -            unsigned long intr_fields, int error_code, uint8_t source)
> -{
> -    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
> -
> -    if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) ) {
> -        /* enqueue the exception till the VMCS switch back to L1 */
> -        nvmx->intr.intr_info = intr_fields;
> -        nvmx->intr.error_code = error_code;
> -        nvmx->intr.source = source;
> -        vcpu_nestedhvm(v).nv_vmexit_pending = 1;
> -        return;
> -    }
> -    else
> -        gdprintk(XENLOG_ERR, "Double Fault on Nested Guest: exception %lx %x"
> -                 "on %lx %x\n", intr_fields, error_code,
> -                 nvmx->intr.intr_info, nvmx->intr.error_code);
> -}
> -
> -static int cf_check nvmx_vmexit_event(
> -    struct vcpu *v, const struct x86_event *event)
> -{
> -    nvmx_enqueue_n2_exceptions(v, event->vector, event->error_code,
> -                               hvm_intsrc_none);
> -    return NESTEDHVM_VMEXIT_DONE;
> -}
> -
>  static void __vmx_inject_exception(int trap, int type, int error_code)
>  {
>      unsigned long intr_fields;
> @@ -2933,6 +2906,7 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
>      .handle_cd            = vmx_handle_cd,
>      .set_info_guest       = vmx_set_info_guest,
>      .set_rdtsc_exiting    = vmx_set_rdtsc_exiting,
> +#ifdef CONFIG_NESTED_VIRT
>      .nhvm_vcpu_initialise = nvmx_vcpu_initialise,
>      .nhvm_vcpu_destroy    = nvmx_vcpu_destroy,
>      .nhvm_vcpu_reset      = nvmx_vcpu_reset,
> @@ -2942,8 +2916,9 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
>      .nhvm_vcpu_vmexit_event = nvmx_vmexit_event,
>      .nhvm_intr_blocked    = nvmx_intr_blocked,
>      .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources,
> -    .update_vlapic_mode = vmx_vlapic_msr_changed,
>      .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
> +#endif
> +    .update_vlapic_mode = vmx_vlapic_msr_changed,
>  #ifdef CONFIG_VM_EVENT
>      .enable_msr_interception = vmx_enable_msr_interception,
>  #endif
> diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
> index 38952f0696..2bb42678c5 100644
> --- a/xen/arch/x86/hvm/vmx/vvmx.c
> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
> @@ -2821,6 +2821,32 @@ void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr)
>      __vmwrite(read_shadow_field, v->arch.hvm.nvcpu.guest_cr[cr]);
>  }
>  
> +void nvmx_enqueue_n2_exceptions(struct vcpu *v,
> +            unsigned long intr_fields, int error_code, uint8_t source)
> +{
> +    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
> +
> +    if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) ) {
> +        /* enqueue the exception till the VMCS switch back to L1 */
> +        nvmx->intr.intr_info = intr_fields;
> +        nvmx->intr.error_code = error_code;
> +        nvmx->intr.source = source;
> +        vcpu_nestedhvm(v).nv_vmexit_pending = 1;
> +        return;
> +    }
> +    else
> +        gdprintk(XENLOG_ERR, "Double Fault on Nested Guest: exception %lx %x"
> +                 "on %lx %x\n", intr_fields, error_code,
> +                 nvmx->intr.intr_info, nvmx->intr.error_code);
> +}
> +
> +int cf_check nvmx_vmexit_event(struct vcpu *v, const struct x86_event *event)
> +{
> +    nvmx_enqueue_n2_exceptions(v, event->vector, event->error_code,
> +                               hvm_intsrc_none);
> +    return NESTEDHVM_VMEXIT_DONE;
> +}
> +
>  void __init start_nested_vmx(struct hvm_function_table *hvm_function_table)
>  {
>      /* TODO: Require hardware support before enabling nested virt */
> diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
> index 7d9774df59..536a38b450 100644
> --- a/xen/arch/x86/include/asm/hvm/hvm.h
> +++ b/xen/arch/x86/include/asm/hvm/hvm.h
> @@ -711,7 +711,7 @@ static inline bool hvm_altp2m_supported(void)
>  /* Returns true if we have the minimum hardware requirements for nested virt */
>  static inline bool hvm_nested_virt_supported(void)
>  {
> -    return hvm_funcs.caps.nested_virt;
> +    return IS_ENABLED(CONFIG_NESTED_VIRT) && hvm_funcs.caps.nested_virt;
>  }
>  
>  #ifdef CONFIG_ALTP2M
> diff --git a/xen/arch/x86/include/asm/hvm/nestedhvm.h b/xen/arch/x86/include/asm/hvm/nestedhvm.h
> index ea2c1bc328..2f8209271a 100644
> --- a/xen/arch/x86/include/asm/hvm/nestedhvm.h
> +++ b/xen/arch/x86/include/asm/hvm/nestedhvm.h
> @@ -25,9 +25,21 @@ enum nestedhvm_vmexits {
>  /* Nested HVM on/off per domain */
>  static inline bool nestedhvm_enabled(const struct domain *d)
>  {
> -    return IS_ENABLED(CONFIG_HVM) && (d->options & XEN_DOMCTL_CDF_nested_virt);
> +    return IS_ENABLED(CONFIG_NESTED_VIRT) &&
> +           (d->options & XEN_DOMCTL_CDF_nested_virt);
>  }
>  
> +/* Nested paging */
> +#define NESTEDHVM_PAGEFAULT_DONE       0
> +#define NESTEDHVM_PAGEFAULT_INJECT     1
> +#define NESTEDHVM_PAGEFAULT_L1_ERROR   2
> +#define NESTEDHVM_PAGEFAULT_L0_ERROR   3
> +#define NESTEDHVM_PAGEFAULT_MMIO       4
> +#define NESTEDHVM_PAGEFAULT_RETRY      5
> +#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
> +
> +#ifdef CONFIG_NESTED_VIRT
> +
>  /* Nested VCPU */
>  int nestedhvm_vcpu_initialise(struct vcpu *v);
>  void nestedhvm_vcpu_destroy(struct vcpu *v);
> @@ -38,14 +50,6 @@ bool nestedhvm_vcpu_in_guestmode(struct vcpu *v);
>  #define nestedhvm_vcpu_exit_guestmode(v)  \
>      vcpu_nestedhvm(v).nv_guestmode = 0
>  
> -/* Nested paging */
> -#define NESTEDHVM_PAGEFAULT_DONE       0
> -#define NESTEDHVM_PAGEFAULT_INJECT     1
> -#define NESTEDHVM_PAGEFAULT_L1_ERROR   2
> -#define NESTEDHVM_PAGEFAULT_L0_ERROR   3
> -#define NESTEDHVM_PAGEFAULT_MMIO       4
> -#define NESTEDHVM_PAGEFAULT_RETRY      5
> -#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
>  int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
>                                      struct npfec npfec);
>  
> @@ -59,6 +63,48 @@ unsigned long *nestedhvm_vcpu_iomap_get(bool ioport_80, bool ioport_ed);
>  
>  void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m);
>  
> +#else /* !CONFIG_NESTED_VIRT */

There's a lot more stubs than needed here.

> +
> +static inline int nestedhvm_vcpu_initialise(struct vcpu *v)
> +{
> +    ASSERT_UNREACHABLE();
> +    return -EOPNOTSUPP;
> +}

Can remove.

> +static inline void nestedhvm_vcpu_destroy(struct vcpu *v) { }

Must stay.

> +static inline void nestedhvm_vcpu_reset(struct vcpu *v)
> +{
> +    ASSERT_UNREACHABLE();
> +}

Can remove.

> +static inline bool nestedhvm_vcpu_in_guestmode(struct vcpu *v) { return false; }

Must stay.

> +static inline int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
> +                                                  struct npfec npfec)
> +{
> +    ASSERT_UNREACHABLE();
> +    return NESTEDHVM_PAGEFAULT_L0_ERROR;
> +}

Can remove

> +static inline void nestedhvm_vcpu_enter_guestmode(struct vcpu *v)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +static inline void nestedhvm_vcpu_exit_guestmode(struct vcpu *v)
> +{
> +    ASSERT_UNREACHABLE();
> +}

These two can be removed. It might be good to keep the real macros hidden under
CONFIG_NESTED_VIRT though to ensure they can't be called.

> +static inline bool nestedhvm_paging_mode_hap(struct vcpu *v)
> +{
> +    return false;
> +}

This can be removed with a cleaner IS_ENABLED() check in nhvm_vmcx_hap_enabled()

> +static inline bool nestedhvm_vmswitch_in_progress(struct vcpu *v)
> +{
> +    return false;
> +}

Would be cleaner with an IS_ENABLED() check in the macro itself, IMO.

> +static inline void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m)
> +{
> +    ASSERT_UNREACHABLE();
> +}

Can remove.

> +
> +#endif /* CONFIG_NESTED_VIRT */
> +
>  static inline bool nestedhvm_is_n2(struct vcpu *v)
>  {
>      if ( !nestedhvm_enabled(v->domain) ||
> diff --git a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
> index da10d3fa96..d0c1ae29f6 100644
> --- a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
> +++ b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h

Most stubs in this header aren't needed.

You only need nvmx_cpu_up(), nvmx_cpu_dead(), nvmx_msr_read_intercept() and
nvmx_handle_insn(). There's also a bug in that last one...

> @@ -73,6 +73,8 @@ union vmx_inst_info {
>      u32 word;
>  };
>  
> +#ifdef CONFIG_NESTED_VIRT
> +
>  int cf_check nvmx_vcpu_initialise(struct vcpu *v);
>  void cf_check nvmx_vcpu_destroy(struct vcpu *v);
>  int cf_check nvmx_vcpu_reset(struct vcpu *v);
> @@ -199,5 +201,77 @@ int nept_translate_l2ga(struct vcpu *v, paddr_t l2ga,
>                          uint64_t *exit_qual, uint32_t *exit_reason);
>  int nvmx_cpu_up_prepare(unsigned int cpu);
>  void nvmx_cpu_dead(unsigned int cpu);
> +int cf_check nvmx_vmexit_event(struct vcpu *v, const struct x86_event *event);
> +void nvmx_enqueue_n2_exceptions(struct vcpu *v,
> +            unsigned long intr_fields, int error_code, uint8_t source);
> +
> +#else /* !CONFIG_NESTED_VIRT */
> +
> +static inline void nvmx_update_exec_control(struct vcpu *v, u32 value)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +static inline void nvmx_update_secondary_exec_control(struct vcpu *v,
> +                                                      unsigned long value)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +static inline void nvmx_update_exception_bitmap(struct vcpu *v,
> +                                                unsigned long value)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +static inline u64 nvmx_get_tsc_offset(struct vcpu *v)
> +{
> +    ASSERT_UNREACHABLE();
> +    return 0;
> +}
> +static inline void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +static inline bool nvmx_intercepts_exception(struct vcpu *v, unsigned int vector,
> +                                             int error_code)
> +{
> +    ASSERT_UNREACHABLE();
> +    return false;
> +}
> +static inline int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs,
> +                                         unsigned int exit_reason)
> +{
> +    ASSERT_UNREACHABLE();
> +    return 0;
> +}
> +static inline void nvmx_idtv_handling(void)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +static inline int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content)
> +{
> +    /* return 0 to trigger #GP */
> +    return 0;
> +}
> +static inline int nvmx_handle_vmx_insn(struct cpu_user_regs *regs,
> +                                       unsigned int exit_reason)
> +{
> +    ASSERT_UNREACHABLE();
> +    return X86EMUL_EXCEPTION;
> +}

... here. This is perfectly reachable and will cause a hypervisor crash should
an L1 try to probe the VMX-family of instructions. Even on realease this would
behave very oddly because you're missing injecting #UD. This stub should be:

        hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
        return X86EMUL_EXCEPTION;

> +static inline int nvmx_cpu_up_prepare(unsigned int cpu) { return 0; }
> +static inline void nvmx_cpu_dead(unsigned int cpu) { }
> +static inline void nvmx_enqueue_n2_exceptions(struct vcpu *v,
> +            unsigned long intr_fields, int error_code, uint8_t source)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +
> +static inline u64 get_vvmcs(const struct vcpu *vcpu, u32 encoding)
> +{
> +    ASSERT_UNREACHABLE();
> +    return 0;
> +}
> +
> +#endif /* CONFIG_NESTED_VIRT */
> +
>  #endif /* __ASM_X86_HVM_VVMX_H__ */
>  
> diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
> index 960f6e8409..aa15811c2e 100644
> --- a/xen/arch/x86/mm/Makefile
> +++ b/xen/arch/x86/mm/Makefile
> @@ -7,7 +7,7 @@ obj-$(CONFIG_SHADOW_PAGING) += guest_walk_4.o
>  obj-$(CONFIG_VM_EVENT) += mem_access.o
>  obj-$(CONFIG_MEM_PAGING) += mem_paging.o
>  obj-$(CONFIG_MEM_SHARING) += mem_sharing.o
> -obj-$(CONFIG_HVM) += nested.o
> +obj-$(CONFIG_NESTED_VIRT) += nested.o
>  obj-$(CONFIG_HVM) += p2m.o
>  obj-y += p2m-basic.o
>  obj-$(CONFIG_INTEL_VMX) += p2m-ept.o
> diff --git a/xen/arch/x86/mm/hap/Makefile b/xen/arch/x86/mm/hap/Makefile
> index 67c29b2162..efdc91ea82 100644
> --- a/xen/arch/x86/mm/hap/Makefile
> +++ b/xen/arch/x86/mm/hap/Makefile
> @@ -2,5 +2,6 @@ obj-y += hap.o
>  obj-y += guest_walk_2.o
>  obj-y += guest_walk_3.o
>  obj-y += guest_walk_4.o
> -obj-y += nested_hap.o
> -obj-$(CONFIG_INTEL_VMX) += nested_ept.o
> +nested-y := nested_hap.o
> +nested-$(CONFIG_INTEL_VMX) += nested_ept.o
> +obj-$(CONFIG_NESTED_VIRT) += $(nested-y)

Why not use plain filter?

	-obj-y += nested_hap.o
	+obj-$(CONFIG_NESTED_VIRT) += nested_hap.o
	-obj-$(CONFIG_INTEL_VMX) += nested_ept.o
	+obj-$(filter $(CONFIG_NESTED_VIRT),$(CONFIG_INTEL_VMX)) += nested_ept.o

> diff --git a/xen/arch/x86/mm/p2m.h b/xen/arch/x86/mm/p2m.h
> index 635f5a7f45..63808dddcc 100644
> --- a/xen/arch/x86/mm/p2m.h
> +++ b/xen/arch/x86/mm/p2m.h
> @@ -25,9 +25,15 @@ void p2m_teardown_altp2m(struct domain *d);
>  void p2m_flush_table_locked(struct p2m_domain *p2m);
>  int __must_check p2m_remove_entry(struct p2m_domain *p2m, gfn_t gfn, mfn_t mfn,
>                                    unsigned int page_order);
> +#ifdef CONFIG_NESTED_VIRT
>  void p2m_nestedp2m_init(struct p2m_domain *p2m);
>  int p2m_init_nestedp2m(struct domain *d);
>  void p2m_teardown_nestedp2m(struct domain *d);
> +#else
> +static inline void p2m_nestedp2m_init(struct p2m_domain *p2m) { }
> +static inline int p2m_init_nestedp2m(struct domain *d) { return 0; }
> +static inline void p2m_teardown_nestedp2m(struct domain *d) { }
> +#endif

Seeing how there's a single callsite I'd rather see those callsites check for
IS_ENABLED(), I think.

>  
>  int ept_p2m_init(struct p2m_domain *p2m);
>  void ept_p2m_uninit(struct p2m_domain *p2m);
> diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
> index 1b04947516..b1d865e1c8 100644
> --- a/xen/arch/x86/sysctl.c
> +++ b/xen/arch/x86/sysctl.c
> @@ -103,6 +103,8 @@ void arch_do_physinfo(struct xen_sysctl_physinfo *pi)
>          pi->capabilities |= XEN_SYSCTL_PHYSCAP_hap;
>      if ( IS_ENABLED(CONFIG_SHADOW_PAGING) )
>          pi->capabilities |= XEN_SYSCTL_PHYSCAP_shadow;
> +    if ( hvm_nested_virt_supported() )
> +        pi->capabilities |= XEN_SYSCTL_PHYSCAP_nestedhvm;
>  }
>  
>  long arch_do_sysctl(
> diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
> index 66c9b65465..b4bd1dd7b2 100644
> --- a/xen/include/public/sysctl.h
> +++ b/xen/include/public/sysctl.h
> @@ -100,9 +100,11 @@ struct xen_sysctl_tbuf_op {
>  /* Xen supports the Grant v1 and/or v2 ABIs. */
>  #define XEN_SYSCTL_PHYSCAP_gnttab_v1     (1u << 8)
>  #define XEN_SYSCTL_PHYSCAP_gnttab_v2     (1u << 9)
> +/* The platform supports nested HVM. */
> +#define XEN_SYSCTL_PHYSCAP_nestedhvm     (1u << 10)
>  
>  /* Max XEN_SYSCTL_PHYSCAP_* constant.  Used for ABI checking. */
> -#define XEN_SYSCTL_PHYSCAP_MAX XEN_SYSCTL_PHYSCAP_gnttab_v2
> +#define XEN_SYSCTL_PHYSCAP_MAX XEN_SYSCTL_PHYSCAP_nestedhvm
>  
>  #if defined(__arm__) || defined(__aarch64__)
>  #define XEN_SYSCTL_PHYSCAP_ARM_SVE_MASK  (0x1FU)

Cheers,
Alejandro



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] x86/hvm: Add Kconfig option to disable nested virtualization
  2026-02-16 10:34 ` Alejandro Vallejo
@ 2026-02-16 11:01   ` Jan Beulich
  2026-02-16 11:28     ` Alejandro Vallejo
  0 siblings, 1 reply; 10+ messages in thread
From: Jan Beulich @ 2026-02-16 11:01 UTC (permalink / raw)
  To: Alejandro Vallejo
  Cc: roger.pau, andrew.cooper3, jason.andryuk, Stefano Stabellini,
	xen-devel

On 16.02.2026 11:34, Alejandro Vallejo wrote:
> On Fri Feb 13, 2026 at 11:02 PM CET, Stefano Stabellini wrote:
>> --- a/xen/arch/x86/hvm/Kconfig
>> +++ b/xen/arch/x86/hvm/Kconfig
>> @@ -92,4 +92,11 @@ config MEM_SHARING
>>  	bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED
>>  	depends on INTEL_VMX
>>  
>> +config NESTED_VIRT
>> +	bool "Nested virtualization support"
>> +	depends on HVM
>> +	help
>> +	  Enable nested virtualization, allowing guests to run their own
>> +	  hypervisors. This requires hardware support.
> 
> nit: If we state above "allowing HVM guests..." rather than plain "guests" we can
> then get rid of the "This requires hardware support line". What you probably
> meant is that this is HVM-only and we don't allow PV nesting.
> 
> "This requires hardware support" makes me (the user) think my hardware needs
> something special to support nesting, when in reality I just need HVM support.

When replying, I also initially meant to make this broad a statement, but then
went to check: While indeed it ought to be possible to implement nested without
further hw support, both demands HAP and SVM demands a few more advanced
features (see start_nested_svm()).

>> --- a/xen/arch/x86/mm/hap/Makefile
>> +++ b/xen/arch/x86/mm/hap/Makefile
>> @@ -2,5 +2,6 @@ obj-y += hap.o
>>  obj-y += guest_walk_2.o
>>  obj-y += guest_walk_3.o
>>  obj-y += guest_walk_4.o
>> -obj-y += nested_hap.o
>> -obj-$(CONFIG_INTEL_VMX) += nested_ept.o
>> +nested-y := nested_hap.o
>> +nested-$(CONFIG_INTEL_VMX) += nested_ept.o
>> +obj-$(CONFIG_NESTED_VIRT) += $(nested-y)
> 
> Why not use plain filter?
> 
> 	-obj-y += nested_hap.o
> 	+obj-$(CONFIG_NESTED_VIRT) += nested_hap.o
> 	-obj-$(CONFIG_INTEL_VMX) += nested_ept.o
> 	+obj-$(filter $(CONFIG_NESTED_VIRT),$(CONFIG_INTEL_VMX)) += nested_ept.o

It may have been like this in v1. Problem being that this leads to long lines,
which doesn't scale very well (and is - imo - harder to read). Especially when
you consider what happens when it's more than two settings that need checking.
The list approach easily scales to about anything (by using as many separate
lists as you need).

Jan


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] x86/hvm: Add Kconfig option to disable nested virtualization
  2026-02-16 11:01   ` Jan Beulich
@ 2026-02-16 11:28     ` Alejandro Vallejo
  0 siblings, 0 replies; 10+ messages in thread
From: Alejandro Vallejo @ 2026-02-16 11:28 UTC (permalink / raw)
  To: Jan Beulich
  Cc: roger.pau, andrew.cooper3, jason.andryuk, Stefano Stabellini,
	xen-devel

On Mon Feb 16, 2026 at 12:01 PM CET, Jan Beulich wrote:
>>> --- a/xen/arch/x86/mm/hap/Makefile
>>> +++ b/xen/arch/x86/mm/hap/Makefile
>>> @@ -2,5 +2,6 @@ obj-y += hap.o
>>>  obj-y += guest_walk_2.o
>>>  obj-y += guest_walk_3.o
>>>  obj-y += guest_walk_4.o
>>> -obj-y += nested_hap.o
>>> -obj-$(CONFIG_INTEL_VMX) += nested_ept.o
>>> +nested-y := nested_hap.o
>>> +nested-$(CONFIG_INTEL_VMX) += nested_ept.o
>>> +obj-$(CONFIG_NESTED_VIRT) += $(nested-y)
>> 
>> Why not use plain filter?
>> 
>> 	-obj-y += nested_hap.o
>> 	+obj-$(CONFIG_NESTED_VIRT) += nested_hap.o
>> 	-obj-$(CONFIG_INTEL_VMX) += nested_ept.o
>> 	+obj-$(filter $(CONFIG_NESTED_VIRT),$(CONFIG_INTEL_VMX)) += nested_ept.o
>
> It may have been like this in v1. Problem being that this leads to long lines,
> which doesn't scale very well (and is - imo - harder to read). Especially when
> you consider what happens when it's more than two settings that need checking.
> The list approach easily scales to about anything (by using as many separate
> lists as you need).
>
> Jan

I'd agree should we need more than 2 settings, long config parameter names
or long filenames, but none of that applies here. It fits neatly in within 80
columns and the extra indirection bumps the cognitive load (subjectively
speaking) way more than the single line does. Plus, it takes more vertical
space.

Even then, I'd rather have ifeq on the 3rd and/or 4th parameters and filter
inside, which makes the group stand out much better and doesn't pollute the
global namespace with even more names.

And there's the matter of "filter" being in use very prevalently elsewhere.

My .02, anyway. They are functionally equivalent, after all.

Cheers,
Alejandro


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2026-02-16 11:29 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-06  1:50 [PATCH] x86/hvm: Add Kconfig option to disable nested virtualization Stefano Stabellini
2026-02-06  4:07 ` Demi Marie Obenour
2026-02-06  8:15 ` Roger Pau Monné
2026-02-06 20:52   ` Stefano Stabellini
  -- strict thread matches above, loose matches on Subject: below --
2026-02-13 22:02 Stefano Stabellini
2026-02-13 23:48 ` Demi Marie Obenour
2026-02-16 10:00 ` Jan Beulich
2026-02-16 10:34 ` Alejandro Vallejo
2026-02-16 11:01   ` Jan Beulich
2026-02-16 11:28     ` Alejandro Vallejo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.