kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH kvmtool v2 0/6] arm64: Nested virtualization support
@ 2025-07-25 14:40 Andre Przywara
  2025-07-25 14:40 ` [PATCH kvmtool v2 1/6] Sync kernel UAPI headers with v6.16-rc1 Andre Przywara
                   ` (5 more replies)
  0 siblings, 6 replies; 13+ messages in thread
From: Andre Przywara @ 2025-07-25 14:40 UTC (permalink / raw)
  To: Will Deacon, Julien Thierry, Marc Zyngier; +Cc: kvm, kvmarm, Alexandru Elisei

v2 of the nested virt support series, just adding three patches from
Marc's repo, to complete the nested virt experience.
Marc, I added some commit messages to your patches, please have a look if
they make vaguely sense. Thanks!
========================================================

Thanks to the imperturbable efforts from Marc, arm64 support for nested
virtualization has now reached the mainline kernel, which means the
respective kvmtool support should now be ready as well.

Patch 1 updates the kernel headers, to get the new EL2 capability, and
the VGIC device control to setup the maintenance IRQ.
Patch 2 introduces the new "--nested" command line option, to let the
VCPUs start in EL2. To allow KVM guests running in such a guest, we also
need VGIC support, which patch 3 allows by setting the maintenance IRQ.
Patch 4 to 6 are picked from Marc's repo, and allow to set the arch
timer offset, enable non-VHE guests (at the cost of losing recursive
nested virtualisation), and also advertise the virtual EL2 timer IRQ.

Tested on the FVP (with some good deal of patience), and some commercial
(non-fruity) hardware, down to a guest's guest's guest.

Cheers,
Andre

Andre Przywara (3):
  Sync kernel UAPI headers with v6.16-rc1
  arm64: Initial nested virt support
  arm64: nested: add support for setting maintenance IRQ

Marc Zyngier (3):
  arm64: Add counter offset control
  arm64: add FEAT_E2H0 support (TBC)
  arm64: Generate HYP timer interrupt specifiers

 arm64/arm-cpu.c                     |  7 ++-
 arm64/fdt.c                         |  5 +-
 arm64/gic.c                         | 21 +++++++-
 arm64/include/asm/kvm.h             | 23 +++++++--
 arm64/include/kvm/gic.h             |  2 +-
 arm64/include/kvm/kvm-config-arch.h | 11 ++++-
 arm64/include/kvm/timer.h           |  2 +-
 arm64/kvm-cpu.c                     | 14 +++++-
 arm64/kvm.c                         | 17 +++++++
 arm64/timer.c                       | 29 +++++------
 include/linux/kvm.h                 |  5 ++
 include/linux/virtio_net.h          | 13 +++++
 include/linux/virtio_pci.h          |  1 +
 riscv/include/asm/kvm.h             |  2 +
 x86/include/asm/kvm.h               | 75 +++++++++++++++++++++++++++++
 15 files changed, 196 insertions(+), 31 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH kvmtool v2 1/6] Sync kernel UAPI headers with v6.16-rc1
  2025-07-25 14:40 [PATCH kvmtool v2 0/6] arm64: Nested virtualization support Andre Przywara
@ 2025-07-25 14:40 ` Andre Przywara
  2025-07-25 14:40 ` [PATCH kvmtool v2 2/6] arm64: Initial nested virt support Andre Przywara
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 13+ messages in thread
From: Andre Przywara @ 2025-07-25 14:40 UTC (permalink / raw)
  To: Will Deacon, Julien Thierry, Marc Zyngier; +Cc: kvm, kvmarm, Alexandru Elisei

Needed for ARM nested virt support.
Generated using util/update_headers.sh.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 arm64/include/asm/kvm.h    | 23 ++++++++++--
 include/linux/kvm.h        |  5 +++
 include/linux/virtio_net.h | 13 +++++++
 include/linux/virtio_pci.h |  1 +
 riscv/include/asm/kvm.h    |  2 +
 x86/include/asm/kvm.h      | 75 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 115 insertions(+), 4 deletions(-)

diff --git a/arm64/include/asm/kvm.h b/arm64/include/asm/kvm.h
index 568bf858f..ed5f38926 100644
--- a/arm64/include/asm/kvm.h
+++ b/arm64/include/asm/kvm.h
@@ -105,6 +105,7 @@ struct kvm_regs {
 #define KVM_ARM_VCPU_PTRAUTH_ADDRESS	5 /* VCPU uses address authentication */
 #define KVM_ARM_VCPU_PTRAUTH_GENERIC	6 /* VCPU uses generic authentication */
 #define KVM_ARM_VCPU_HAS_EL2		7 /* Support nested virtualization */
+#define KVM_ARM_VCPU_HAS_EL2_E2H0	8 /* Limit NV support to E2H RES0 */
 
 struct kvm_vcpu_init {
 	__u32 target;
@@ -371,6 +372,7 @@ enum {
 #endif
 };
 
+/* Vendor hyper call function numbers 0-63 */
 #define KVM_REG_ARM_VENDOR_HYP_BMAP		KVM_REG_ARM_FW_FEAT_BMAP_REG(2)
 
 enum {
@@ -381,6 +383,17 @@ enum {
 #endif
 };
 
+/* Vendor hyper call function numbers 64-127 */
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_2		KVM_REG_ARM_FW_FEAT_BMAP_REG(3)
+
+enum {
+	KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_VER	= 0,
+	KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_CPUS	= 1,
+#ifdef __KERNEL__
+	KVM_REG_ARM_VENDOR_HYP_BMAP_2_BIT_COUNT,
+#endif
+};
+
 /* Device Control API on vm fd */
 #define KVM_ARM_VM_SMCCC_CTRL		0
 #define   KVM_ARM_VM_SMCCC_FILTER	0
@@ -403,6 +416,7 @@ enum {
 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
 #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
+#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ  9
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
 			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
@@ -417,10 +431,11 @@ enum {
 
 /* Device Control API on vcpu fd */
 #define KVM_ARM_VCPU_PMU_V3_CTRL	0
-#define   KVM_ARM_VCPU_PMU_V3_IRQ	0
-#define   KVM_ARM_VCPU_PMU_V3_INIT	1
-#define   KVM_ARM_VCPU_PMU_V3_FILTER	2
-#define   KVM_ARM_VCPU_PMU_V3_SET_PMU	3
+#define   KVM_ARM_VCPU_PMU_V3_IRQ		0
+#define   KVM_ARM_VCPU_PMU_V3_INIT		1
+#define   KVM_ARM_VCPU_PMU_V3_FILTER		2
+#define   KVM_ARM_VCPU_PMU_V3_SET_PMU		3
+#define   KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS	4
 #define KVM_ARM_VCPU_TIMER_CTRL		1
 #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER		0
 #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER		1
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 45e6d8fca..d00b85cb1 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -375,6 +375,7 @@ struct kvm_run {
 #define KVM_SYSTEM_EVENT_WAKEUP         4
 #define KVM_SYSTEM_EVENT_SUSPEND        5
 #define KVM_SYSTEM_EVENT_SEV_TERM       6
+#define KVM_SYSTEM_EVENT_TDX_FATAL      7
 			__u32 type;
 			__u32 ndata;
 			union {
@@ -929,6 +930,10 @@ struct kvm_enable_cap {
 #define KVM_CAP_PRE_FAULT_MEMORY 236
 #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
 #define KVM_CAP_X86_GUEST_MODE 238
+#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239
+#define KVM_CAP_ARM_EL2 240
+#define KVM_CAP_ARM_EL2_E2H0 241
+#define KVM_CAP_RISCV_MP_STATE_RESET 242
 
 struct kvm_irq_routing_irqchip {
 	__u32 irqchip;
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index ac9174717..963540dea 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -327,6 +327,19 @@ struct virtio_net_rss_config {
 	__u8 hash_key_data[/* hash_key_length */];
 };
 
+struct virtio_net_rss_config_hdr {
+	__le32 hash_types;
+	__le16 indirection_table_mask;
+	__le16 unclassified_queue;
+	__le16 indirection_table[/* 1 + indirection_table_mask */];
+};
+
+struct virtio_net_rss_config_trailer {
+	__le16 max_tx_vq;
+	__u8 hash_key_length;
+	__u8 hash_key_data[/* hash_key_length */];
+};
+
  #define VIRTIO_NET_CTRL_MQ_RSS_CONFIG          1
 
 /*
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index 8549d4571..c691ac210 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -246,6 +246,7 @@ struct virtio_pci_cfg_cap {
 #define VIRTIO_ADMIN_CMD_LIST_USE	0x1
 
 /* Admin command group type. */
+#define VIRTIO_ADMIN_GROUP_TYPE_SELF	0x0
 #define VIRTIO_ADMIN_GROUP_TYPE_SRIOV	0x1
 
 /* Transitional device admin command. */
diff --git a/riscv/include/asm/kvm.h b/riscv/include/asm/kvm.h
index f06bc5efc..5f59fd226 100644
--- a/riscv/include/asm/kvm.h
+++ b/riscv/include/asm/kvm.h
@@ -182,6 +182,8 @@ enum KVM_RISCV_ISA_EXT_ID {
 	KVM_RISCV_ISA_EXT_SVVPTC,
 	KVM_RISCV_ISA_EXT_ZABHA,
 	KVM_RISCV_ISA_EXT_ZICCRSE,
+	KVM_RISCV_ISA_EXT_ZAAMO,
+	KVM_RISCV_ISA_EXT_ZALRSC,
 	KVM_RISCV_ISA_EXT_MAX,
 };
 
diff --git a/x86/include/asm/kvm.h b/x86/include/asm/kvm.h
index 9e75da97b..6f3499507 100644
--- a/x86/include/asm/kvm.h
+++ b/x86/include/asm/kvm.h
@@ -441,6 +441,7 @@ struct kvm_sync_regs {
 #define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS	(1 << 6)
 #define KVM_X86_QUIRK_SLOT_ZAP_ALL		(1 << 7)
 #define KVM_X86_QUIRK_STUFF_FEATURE_MSRS	(1 << 8)
+#define KVM_X86_QUIRK_IGNORE_GUEST_PAT		(1 << 9)
 
 #define KVM_STATE_NESTED_FORMAT_VMX	0
 #define KVM_STATE_NESTED_FORMAT_SVM	1
@@ -559,6 +560,9 @@ struct kvm_x86_mce {
 #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE	(1 << 7)
 #define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA	(1 << 8)
 
+#define KVM_XEN_MSR_MIN_INDEX			0x40000000u
+#define KVM_XEN_MSR_MAX_INDEX			0x4fffffffu
+
 struct kvm_xen_hvm_config {
 	__u32 flags;
 	__u32 msr;
@@ -841,6 +845,7 @@ struct kvm_sev_snp_launch_start {
 };
 
 /* Kept in sync with firmware values for simplicity. */
+#define KVM_SEV_PAGE_TYPE_INVALID		0x0
 #define KVM_SEV_SNP_PAGE_TYPE_NORMAL		0x1
 #define KVM_SEV_SNP_PAGE_TYPE_ZERO		0x3
 #define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED	0x4
@@ -927,4 +932,74 @@ struct kvm_hyperv_eventfd {
 #define KVM_X86_SNP_VM		4
 #define KVM_X86_TDX_VM		5
 
+/* Trust Domain eXtension sub-ioctl() commands. */
+enum kvm_tdx_cmd_id {
+	KVM_TDX_CAPABILITIES = 0,
+	KVM_TDX_INIT_VM,
+	KVM_TDX_INIT_VCPU,
+	KVM_TDX_INIT_MEM_REGION,
+	KVM_TDX_FINALIZE_VM,
+	KVM_TDX_GET_CPUID,
+
+	KVM_TDX_CMD_NR_MAX,
+};
+
+struct kvm_tdx_cmd {
+	/* enum kvm_tdx_cmd_id */
+	__u32 id;
+	/* flags for sub-commend. If sub-command doesn't use this, set zero. */
+	__u32 flags;
+	/*
+	 * data for each sub-command. An immediate or a pointer to the actual
+	 * data in process virtual address.  If sub-command doesn't use it,
+	 * set zero.
+	 */
+	__u64 data;
+	/*
+	 * Auxiliary error code.  The sub-command may return TDX SEAMCALL
+	 * status code in addition to -Exxx.
+	 */
+	__u64 hw_error;
+};
+
+struct kvm_tdx_capabilities {
+	__u64 supported_attrs;
+	__u64 supported_xfam;
+	__u64 reserved[254];
+
+	/* Configurable CPUID bits for userspace */
+	struct kvm_cpuid2 cpuid;
+};
+
+struct kvm_tdx_init_vm {
+	__u64 attributes;
+	__u64 xfam;
+	__u64 mrconfigid[6];	/* sha384 digest */
+	__u64 mrowner[6];	/* sha384 digest */
+	__u64 mrownerconfig[6];	/* sha384 digest */
+
+	/* The total space for TD_PARAMS before the CPUIDs is 256 bytes */
+	__u64 reserved[12];
+
+	/*
+	 * Call KVM_TDX_INIT_VM before vcpu creation, thus before
+	 * KVM_SET_CPUID2.
+	 * This configuration supersedes KVM_SET_CPUID2s for VCPUs because the
+	 * TDX module directly virtualizes those CPUIDs without VMM.  The user
+	 * space VMM, e.g. qemu, should make KVM_SET_CPUID2 consistent with
+	 * those values.  If it doesn't, KVM may have wrong idea of vCPUIDs of
+	 * the guest, and KVM may wrongly emulate CPUIDs or MSRs that the TDX
+	 * module doesn't virtualize.
+	 */
+	struct kvm_cpuid2 cpuid;
+};
+
+#define KVM_TDX_MEASURE_MEMORY_REGION   _BITULL(0)
+
+struct kvm_tdx_init_mem_region {
+	__u64 source_addr;
+	__u64 gpa;
+	__u64 nr_pages;
+};
+
 #endif /* _ASM_X86_KVM_H */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH kvmtool v2 2/6] arm64: Initial nested virt support
  2025-07-25 14:40 [PATCH kvmtool v2 0/6] arm64: Nested virtualization support Andre Przywara
  2025-07-25 14:40 ` [PATCH kvmtool v2 1/6] Sync kernel UAPI headers with v6.16-rc1 Andre Przywara
@ 2025-07-25 14:40 ` Andre Przywara
  2025-07-25 14:40 ` [PATCH kvmtool v2 3/6] arm64: nested: add support for setting maintenance IRQ Andre Przywara
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 13+ messages in thread
From: Andre Przywara @ 2025-07-25 14:40 UTC (permalink / raw)
  To: Will Deacon, Julien Thierry, Marc Zyngier; +Cc: kvm, kvmarm, Alexandru Elisei

The ARMv8.3 architecture update includes support for nested
virtualization. Allow the user to specify "--nested" to start a guest in
(virtual) EL2 instead of EL1.
This will also change the PSCI conduit from HVC to SMC in the device
tree.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 arm64/fdt.c                         |  5 ++++-
 arm64/include/kvm/kvm-config-arch.h |  5 ++++-
 arm64/kvm-cpu.c                     | 12 +++++++++++-
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/arm64/fdt.c b/arm64/fdt.c
index df7775876..98f1dd9d4 100644
--- a/arm64/fdt.c
+++ b/arm64/fdt.c
@@ -205,7 +205,10 @@ static int setup_fdt(struct kvm *kvm)
 		_FDT(fdt_property_string(fdt, "compatible", "arm,psci"));
 		fns = &psci_0_1_fns;
 	}
-	_FDT(fdt_property_string(fdt, "method", "hvc"));
+	if (kvm->cfg.arch.nested_virt)
+		_FDT(fdt_property_string(fdt, "method", "smc"));
+	else
+		_FDT(fdt_property_string(fdt, "method", "hvc"));
 	_FDT(fdt_property_cell(fdt, "cpu_suspend", fns->cpu_suspend));
 	_FDT(fdt_property_cell(fdt, "cpu_off", fns->cpu_off));
 	_FDT(fdt_property_cell(fdt, "cpu_on", fns->cpu_on));
diff --git a/arm64/include/kvm/kvm-config-arch.h b/arm64/include/kvm/kvm-config-arch.h
index ee031f010..a1dac28e6 100644
--- a/arm64/include/kvm/kvm-config-arch.h
+++ b/arm64/include/kvm/kvm-config-arch.h
@@ -10,6 +10,7 @@ struct kvm_config_arch {
 	bool		aarch32_guest;
 	bool		has_pmuv3;
 	bool		mte_disabled;
+	bool		nested_virt;
 	u64		kaslr_seed;
 	enum irqchip_type irqchip;
 	u64		fw_addr;
@@ -57,6 +58,8 @@ int sve_vl_parser(const struct option *opt, const char *arg, int unset);
 		     "Type of interrupt controller to emulate in the guest",	\
 		     irqchip_parser, NULL),					\
 	OPT_U64('\0', "firmware-address", &(cfg)->fw_addr,			\
-		"Address where firmware should be loaded"),
+		"Address where firmware should be loaded"),			\
+	OPT_BOOLEAN('\0', "nested", &(cfg)->nested_virt,			\
+		    "Start VCPUs in EL2 (for nested virt)"),
 
 #endif /* ARM_COMMON__KVM_CONFIG_ARCH_H */
diff --git a/arm64/kvm-cpu.c b/arm64/kvm-cpu.c
index 94c08a4d7..42dc11dad 100644
--- a/arm64/kvm-cpu.c
+++ b/arm64/kvm-cpu.c
@@ -71,6 +71,12 @@ static void kvm_cpu__select_features(struct kvm *kvm, struct kvm_vcpu_init *init
 	/* Enable SVE if available */
 	if (kvm__supports_extension(kvm, KVM_CAP_ARM_SVE))
 		init->features[0] |= 1UL << KVM_ARM_VCPU_SVE;
+
+	if (kvm->cfg.arch.nested_virt) {
+		if (!kvm__supports_extension(kvm, KVM_CAP_ARM_EL2))
+			die("EL2 (nested virt) is not supported");
+		init->features[0] |= 1UL << KVM_ARM_VCPU_HAS_EL2;
+	}
 }
 
 static int vcpu_configure_sve(struct kvm_cpu *vcpu)
@@ -313,7 +319,11 @@ static void reset_vcpu_aarch64(struct kvm_cpu *vcpu)
 	reg.addr = (u64)&data;
 
 	/* pstate = all interrupts masked */
-	data	= PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1h;
+	data	= PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT;
+	if (vcpu->kvm->cfg.arch.nested_virt)
+		data |= PSR_MODE_EL2h;
+	else
+		data |= PSR_MODE_EL1h;
 	reg.id	= ARM64_CORE_REG(regs.pstate);
 	if (ioctl(vcpu->vcpu_fd, KVM_SET_ONE_REG, &reg) < 0)
 		die_perror("KVM_SET_ONE_REG failed (spsr[EL1])");
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH kvmtool v2 3/6] arm64: nested: add support for setting maintenance IRQ
  2025-07-25 14:40 [PATCH kvmtool v2 0/6] arm64: Nested virtualization support Andre Przywara
  2025-07-25 14:40 ` [PATCH kvmtool v2 1/6] Sync kernel UAPI headers with v6.16-rc1 Andre Przywara
  2025-07-25 14:40 ` [PATCH kvmtool v2 2/6] arm64: Initial nested virt support Andre Przywara
@ 2025-07-25 14:40 ` Andre Przywara
  2025-07-25 14:40 ` [PATCH kvmtool v2 4/6] arm64: Add counter offset control Andre Przywara
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 13+ messages in thread
From: Andre Przywara @ 2025-07-25 14:40 UTC (permalink / raw)
  To: Will Deacon, Julien Thierry, Marc Zyngier; +Cc: kvm, kvmarm, Alexandru Elisei

Uses the new VGIC KVM device attribute to set the maintenance IRQ.
This is fixed to use PPI 9, as a platform decision made by kvmtool,
matching the SBSA recommendation.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 arm64/arm-cpu.c         |  3 ++-
 arm64/gic.c             | 21 ++++++++++++++++++++-
 arm64/include/kvm/gic.h |  2 +-
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/arm64/arm-cpu.c b/arm64/arm-cpu.c
index 69bb2cb2c..1e456f2c6 100644
--- a/arm64/arm-cpu.c
+++ b/arm64/arm-cpu.c
@@ -14,7 +14,8 @@ static void generate_fdt_nodes(void *fdt, struct kvm *kvm)
 {
 	int timer_interrupts[4] = {13, 14, 11, 10};
 
-	gic__generate_fdt_nodes(fdt, kvm->cfg.arch.irqchip);
+	gic__generate_fdt_nodes(fdt, kvm->cfg.arch.irqchip,
+				kvm->cfg.arch.nested_virt);
 	timer__generate_fdt_nodes(fdt, kvm, timer_interrupts);
 	pmu__generate_fdt_nodes(fdt, kvm);
 }
diff --git a/arm64/gic.c b/arm64/gic.c
index b0d3a1abb..7461b0f3f 100644
--- a/arm64/gic.c
+++ b/arm64/gic.c
@@ -11,6 +11,8 @@
 
 #define IRQCHIP_GIC 0
 
+#define GIC_MAINT_IRQ	9
+
 static int gic_fd = -1;
 static u64 gic_redists_base;
 static u64 gic_redists_size;
@@ -302,10 +304,15 @@ static int gic__init_gic(struct kvm *kvm)
 
 	int lines = irq__get_nr_allocated_lines();
 	u32 nr_irqs = ALIGN(lines, 32) + GIC_SPI_IRQ_BASE;
+	u32 maint_irq = GIC_MAINT_IRQ + 16;			/* PPI */
 	struct kvm_device_attr nr_irqs_attr = {
 		.group	= KVM_DEV_ARM_VGIC_GRP_NR_IRQS,
 		.addr	= (u64)(unsigned long)&nr_irqs,
 	};
+	struct kvm_device_attr maint_irq_attr = {
+		.group	= KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ,
+		.addr	= (u64)(unsigned long)&maint_irq,
+	};
 	struct kvm_device_attr vgic_init_attr = {
 		.group	= KVM_DEV_ARM_VGIC_GRP_CTRL,
 		.attr	= KVM_DEV_ARM_VGIC_CTRL_INIT,
@@ -325,6 +332,13 @@ static int gic__init_gic(struct kvm *kvm)
 			return ret;
 	}
 
+	if (kvm->cfg.arch.nested_virt &&
+	    !ioctl(gic_fd, KVM_HAS_DEVICE_ATTR, &maint_irq_attr)) {
+		ret = ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &maint_irq_attr);
+		if (ret)
+			return ret;
+	}
+
 	irq__routing_init(kvm);
 
 	if (!ioctl(gic_fd, KVM_HAS_DEVICE_ATTR, &vgic_init_attr)) {
@@ -342,7 +356,7 @@ static int gic__init_gic(struct kvm *kvm)
 }
 late_init(gic__init_gic)
 
-void gic__generate_fdt_nodes(void *fdt, enum irqchip_type type)
+void gic__generate_fdt_nodes(void *fdt, enum irqchip_type type, bool nested)
 {
 	const char *compatible, *msi_compatible = NULL;
 	u64 msi_prop[2];
@@ -350,6 +364,8 @@ void gic__generate_fdt_nodes(void *fdt, enum irqchip_type type)
 		cpu_to_fdt64(ARM_GIC_DIST_BASE), cpu_to_fdt64(ARM_GIC_DIST_SIZE),
 		0, 0,				/* to be filled */
 	};
+	u32 maint_irq[3] = {cpu_to_fdt32(1), cpu_to_fdt32(GIC_MAINT_IRQ),
+			    cpu_to_fdt32(0xff04)};
 
 	switch (type) {
 	case IRQCHIP_GICV2M:
@@ -377,6 +393,9 @@ void gic__generate_fdt_nodes(void *fdt, enum irqchip_type type)
 	_FDT(fdt_property_cell(fdt, "#interrupt-cells", GIC_FDT_IRQ_NUM_CELLS));
 	_FDT(fdt_property(fdt, "interrupt-controller", NULL, 0));
 	_FDT(fdt_property(fdt, "reg", reg_prop, sizeof(reg_prop)));
+	if (nested)
+		_FDT(fdt_property(fdt, "interrupts", maint_irq,
+				  sizeof(maint_irq)));
 	_FDT(fdt_property_cell(fdt, "phandle", PHANDLE_GIC));
 	_FDT(fdt_property_cell(fdt, "#address-cells", 2));
 	_FDT(fdt_property_cell(fdt, "#size-cells", 2));
diff --git a/arm64/include/kvm/gic.h b/arm64/include/kvm/gic.h
index ad8bcbf21..1541a5824 100644
--- a/arm64/include/kvm/gic.h
+++ b/arm64/include/kvm/gic.h
@@ -36,7 +36,7 @@ struct kvm;
 int gic__alloc_irqnum(void);
 int gic__create(struct kvm *kvm, enum irqchip_type type);
 int gic__create_gicv2m_frame(struct kvm *kvm, u64 msi_frame_addr);
-void gic__generate_fdt_nodes(void *fdt, enum irqchip_type type);
+void gic__generate_fdt_nodes(void *fdt, enum irqchip_type type, bool nested);
 u32 gic__get_fdt_irq_cpumask(struct kvm *kvm);
 
 int gic__add_irqfd(struct kvm *kvm, unsigned int gsi, int trigger_fd,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH kvmtool v2 4/6] arm64: Add counter offset control
  2025-07-25 14:40 [PATCH kvmtool v2 0/6] arm64: Nested virtualization support Andre Przywara
                   ` (2 preceding siblings ...)
  2025-07-25 14:40 ` [PATCH kvmtool v2 3/6] arm64: nested: add support for setting maintenance IRQ Andre Przywara
@ 2025-07-25 14:40 ` Andre Przywara
  2025-07-26  9:25   ` Marc Zyngier
  2025-07-25 14:40 ` [PATCH kvmtool v2 5/6] arm64: add FEAT_E2H0 support (TBC) Andre Przywara
  2025-07-25 14:41 ` [PATCH kvmtool v2 6/6] arm64: Generate HYP timer interrupt specifiers Andre Przywara
  5 siblings, 1 reply; 13+ messages in thread
From: Andre Przywara @ 2025-07-25 14:40 UTC (permalink / raw)
  To: Will Deacon, Julien Thierry, Marc Zyngier; +Cc: kvm, kvmarm, Alexandru Elisei

From: Marc Zyngier <maz@kernel.org>

KVM now allows to offset the arch timer counter values reported to a
guest, via a per-VM ioctl. This is conceptually similar to the effects
of CNTVOFF_EL2, but applies to both the emulated physical and virtual
counter, and also to all VCPUs.

Add a command line option to allow setting this value when creating a VM.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 arm64/include/kvm/kvm-config-arch.h |  3 +++
 arm64/kvm.c                         | 17 +++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/arm64/include/kvm/kvm-config-arch.h b/arm64/include/kvm/kvm-config-arch.h
index a1dac28e6..44c43367b 100644
--- a/arm64/include/kvm/kvm-config-arch.h
+++ b/arm64/include/kvm/kvm-config-arch.h
@@ -14,6 +14,7 @@ struct kvm_config_arch {
 	u64		kaslr_seed;
 	enum irqchip_type irqchip;
 	u64		fw_addr;
+	u64		counter_offset;
 	unsigned int	sve_max_vq;
 	bool		no_pvtime;
 };
@@ -59,6 +60,8 @@ int sve_vl_parser(const struct option *opt, const char *arg, int unset);
 		     irqchip_parser, NULL),					\
 	OPT_U64('\0', "firmware-address", &(cfg)->fw_addr,			\
 		"Address where firmware should be loaded"),			\
+	OPT_U64('\0', "counter-offset", &(cfg)->counter_offset,			\
+		"Specify the counter offset, defaulting to 0"),			\
 	OPT_BOOLEAN('\0', "nested", &(cfg)->nested_virt,			\
 		    "Start VCPUs in EL2 (for nested virt)"),
 
diff --git a/arm64/kvm.c b/arm64/kvm.c
index 23b4dab1f..6e971dd78 100644
--- a/arm64/kvm.c
+++ b/arm64/kvm.c
@@ -119,6 +119,22 @@ static void kvm__arch_enable_mte(struct kvm *kvm)
 	pr_debug("MTE capability enabled");
 }
 
+static void kvm__arch_set_counter_offset(struct kvm *kvm)
+{
+	struct kvm_arm_counter_offset offset = {
+		.counter_offset = kvm->cfg.arch.counter_offset,
+	};
+
+	if (!kvm->cfg.arch.counter_offset)
+		return;
+
+	if (!kvm__supports_extension(kvm, KVM_CAP_COUNTER_OFFSET))
+		die("No support for global counter offset");
+
+	if (ioctl(kvm->vm_fd, KVM_ARM_SET_COUNTER_OFFSET, &offset))
+		die_perror("KVM_ARM_SET_COUNTER_OFFSET");
+}
+
 void kvm__arch_init(struct kvm *kvm)
 {
 	/* Create the virtual GIC. */
@@ -126,6 +142,7 @@ void kvm__arch_init(struct kvm *kvm)
 		die("Failed to create virtual GIC");
 
 	kvm__arch_enable_mte(kvm);
+	kvm__arch_set_counter_offset(kvm);
 }
 
 static u64 kvm__arch_get_payload_region_size(struct kvm *kvm)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH kvmtool v2 5/6] arm64: add FEAT_E2H0 support (TBC)
  2025-07-25 14:40 [PATCH kvmtool v2 0/6] arm64: Nested virtualization support Andre Przywara
                   ` (3 preceding siblings ...)
  2025-07-25 14:40 ` [PATCH kvmtool v2 4/6] arm64: Add counter offset control Andre Przywara
@ 2025-07-25 14:40 ` Andre Przywara
  2025-07-25 16:37   ` Marc Zyngier
  2025-07-25 14:41 ` [PATCH kvmtool v2 6/6] arm64: Generate HYP timer interrupt specifiers Andre Przywara
  5 siblings, 1 reply; 13+ messages in thread
From: Andre Przywara @ 2025-07-25 14:40 UTC (permalink / raw)
  To: Will Deacon, Julien Thierry, Marc Zyngier; +Cc: kvm, kvmarm, Alexandru Elisei

From: Marc Zyngier <maz@kernel.org>

To reduce code complexity, KVM only supports nested virtualisation in
VHE mode. So to allow recursive nested virtualisation, and be able to
expose FEAT_NV2 to a guest, we must prevent a guest from turning off
HCR_EL2.E2H, which is covered by not advertising the FEAT_E2H0 architecture
feature.

To allow people to run a guest in non-VHE mode, KVM introduced the
KVM_ARM_VCPU_HAS_EL2_E2H0 feature flag, which will allow control over
HCR_EL2.E2H, but at the cost of turning off FEAT_NV2.

Add a kvmtool command line option "--e2h0" to set that feature bit when
creating a guest, to gain non-VHE, but lose recursive nested virt.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 arm64/include/kvm/kvm-config-arch.h | 5 ++++-
 arm64/kvm-cpu.c                     | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arm64/include/kvm/kvm-config-arch.h b/arm64/include/kvm/kvm-config-arch.h
index 44c43367b..73bf4211a 100644
--- a/arm64/include/kvm/kvm-config-arch.h
+++ b/arm64/include/kvm/kvm-config-arch.h
@@ -11,6 +11,7 @@ struct kvm_config_arch {
 	bool		has_pmuv3;
 	bool		mte_disabled;
 	bool		nested_virt;
+	bool		e2h0;
 	u64		kaslr_seed;
 	enum irqchip_type irqchip;
 	u64		fw_addr;
@@ -63,6 +64,8 @@ int sve_vl_parser(const struct option *opt, const char *arg, int unset);
 	OPT_U64('\0', "counter-offset", &(cfg)->counter_offset,			\
 		"Specify the counter offset, defaulting to 0"),			\
 	OPT_BOOLEAN('\0', "nested", &(cfg)->nested_virt,			\
-		    "Start VCPUs in EL2 (for nested virt)"),
+		    "Start VCPUs in EL2 (for nested virt)"),			\
+	OPT_BOOLEAN('\0', "e2h0", &(cfg)->e2h0,					\
+		    "Create guest without VHE support"),
 
 #endif /* ARM_COMMON__KVM_CONFIG_ARCH_H */
diff --git a/arm64/kvm-cpu.c b/arm64/kvm-cpu.c
index 42dc11dad..6eb76dff4 100644
--- a/arm64/kvm-cpu.c
+++ b/arm64/kvm-cpu.c
@@ -76,6 +76,8 @@ static void kvm_cpu__select_features(struct kvm *kvm, struct kvm_vcpu_init *init
 		if (!kvm__supports_extension(kvm, KVM_CAP_ARM_EL2))
 			die("EL2 (nested virt) is not supported");
 		init->features[0] |= 1UL << KVM_ARM_VCPU_HAS_EL2;
+		if (kvm->cfg.arch.e2h0)
+			init->features[0] |= 1UL << KVM_ARM_VCPU_HAS_EL2_E2H0;
 	}
 }
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH kvmtool v2 6/6] arm64: Generate HYP timer interrupt specifiers
  2025-07-25 14:40 [PATCH kvmtool v2 0/6] arm64: Nested virtualization support Andre Przywara
                   ` (4 preceding siblings ...)
  2025-07-25 14:40 ` [PATCH kvmtool v2 5/6] arm64: add FEAT_E2H0 support (TBC) Andre Przywara
@ 2025-07-25 14:41 ` Andre Przywara
  5 siblings, 0 replies; 13+ messages in thread
From: Andre Przywara @ 2025-07-25 14:41 UTC (permalink / raw)
  To: Will Deacon, Julien Thierry, Marc Zyngier; +Cc: kvm, kvmarm, Alexandru Elisei

From: Marc Zyngier <maz@kernel.org>

FEAT_VHE introduced a non-secure EL2 virtual timer, along with its
interrupt line. Consequently the arch timer DT binding introduced a fifth
interrupt to communicate this interrupt number.

Refactor the interrupts property generation code to deal with a variable
number of interrupts, and forward five interrupts instead of four in case
nested virt is enabled.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 arm64/arm-cpu.c           |  4 +---
 arm64/include/kvm/timer.h |  2 +-
 arm64/timer.c             | 29 ++++++++++++-----------------
 3 files changed, 14 insertions(+), 21 deletions(-)

diff --git a/arm64/arm-cpu.c b/arm64/arm-cpu.c
index 1e456f2c6..abdd6324f 100644
--- a/arm64/arm-cpu.c
+++ b/arm64/arm-cpu.c
@@ -12,11 +12,9 @@
 
 static void generate_fdt_nodes(void *fdt, struct kvm *kvm)
 {
-	int timer_interrupts[4] = {13, 14, 11, 10};
-
 	gic__generate_fdt_nodes(fdt, kvm->cfg.arch.irqchip,
 				kvm->cfg.arch.nested_virt);
-	timer__generate_fdt_nodes(fdt, kvm, timer_interrupts);
+	timer__generate_fdt_nodes(fdt, kvm);
 	pmu__generate_fdt_nodes(fdt, kvm);
 }
 
diff --git a/arm64/include/kvm/timer.h b/arm64/include/kvm/timer.h
index 928e9ea7a..81e093e46 100644
--- a/arm64/include/kvm/timer.h
+++ b/arm64/include/kvm/timer.h
@@ -1,6 +1,6 @@
 #ifndef ARM_COMMON__TIMER_H
 #define ARM_COMMON__TIMER_H
 
-void timer__generate_fdt_nodes(void *fdt, struct kvm *kvm, int *irqs);
+void timer__generate_fdt_nodes(void *fdt, struct kvm *kvm);
 
 #endif /* ARM_COMMON__TIMER_H */
diff --git a/arm64/timer.c b/arm64/timer.c
index 861f2d994..2ac6144f9 100644
--- a/arm64/timer.c
+++ b/arm64/timer.c
@@ -5,31 +5,26 @@
 #include "kvm/timer.h"
 #include "kvm/util.h"
 
-void timer__generate_fdt_nodes(void *fdt, struct kvm *kvm, int *irqs)
+void timer__generate_fdt_nodes(void *fdt, struct kvm *kvm)
 {
 	const char compatible[] = "arm,armv8-timer\0arm,armv7-timer";
 	u32 cpu_mask = gic__get_fdt_irq_cpumask(kvm);
-	u32 irq_prop[] = {
-		cpu_to_fdt32(GIC_FDT_IRQ_TYPE_PPI),
-		cpu_to_fdt32(irqs[0]),
-		cpu_to_fdt32(cpu_mask | IRQ_TYPE_LEVEL_LOW),
+	int irqs[5] = {13, 14, 11, 10, 12};
+	int nr = ARRAY_SIZE(irqs);
+	u32 irq_prop[nr * 3];
 
-		cpu_to_fdt32(GIC_FDT_IRQ_TYPE_PPI),
-		cpu_to_fdt32(irqs[1]),
-		cpu_to_fdt32(cpu_mask | IRQ_TYPE_LEVEL_LOW),
+	if (!kvm->cfg.arch.nested_virt)
+		nr--;
 
-		cpu_to_fdt32(GIC_FDT_IRQ_TYPE_PPI),
-		cpu_to_fdt32(irqs[2]),
-		cpu_to_fdt32(cpu_mask | IRQ_TYPE_LEVEL_LOW),
-
-		cpu_to_fdt32(GIC_FDT_IRQ_TYPE_PPI),
-		cpu_to_fdt32(irqs[3]),
-		cpu_to_fdt32(cpu_mask | IRQ_TYPE_LEVEL_LOW),
-	};
+	for (int i = 0; i < nr; i++) {
+		irq_prop[i * 3 + 0] = cpu_to_fdt32(GIC_FDT_IRQ_TYPE_PPI);
+		irq_prop[i * 3 + 1] = cpu_to_fdt32(irqs[i]);
+		irq_prop[i * 3 + 2] = cpu_to_fdt32(cpu_mask | IRQ_TYPE_LEVEL_LOW);
+	}
 
 	_FDT(fdt_begin_node(fdt, "timer"));
 	_FDT(fdt_property(fdt, "compatible", compatible, sizeof(compatible)));
-	_FDT(fdt_property(fdt, "interrupts", irq_prop, sizeof(irq_prop)));
+	_FDT(fdt_property(fdt, "interrupts", irq_prop, nr * 3 * sizeof(irq_prop[0])));
 	_FDT(fdt_property(fdt, "always-on", NULL, 0));
 	if (kvm->cfg.arch.force_cntfrq > 0)
 		_FDT(fdt_property_cell(fdt, "clock-frequency", kvm->cfg.arch.force_cntfrq));
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH kvmtool v2 5/6] arm64: add FEAT_E2H0 support (TBC)
  2025-07-25 14:40 ` [PATCH kvmtool v2 5/6] arm64: add FEAT_E2H0 support (TBC) Andre Przywara
@ 2025-07-25 16:37   ` Marc Zyngier
  2025-07-26  9:01     ` Wei-Lin Chang
  0 siblings, 1 reply; 13+ messages in thread
From: Marc Zyngier @ 2025-07-25 16:37 UTC (permalink / raw)
  To: Andre Przywara; +Cc: Will Deacon, Julien Thierry, kvm, kvmarm, Alexandru Elisei

Hi Andre,

Thanks for picking this. A few nits below.

On Fri, 25 Jul 2025 15:40:59 +0100,
Andre Przywara <andre.przywara@arm.com> wrote:
> 
> From: Marc Zyngier <maz@kernel.org>
> 
> To reduce code complexity, KVM only supports nested virtualisation in
> VHE mode. So to allow recursive nested virtualisation, and be able to
> expose FEAT_NV2 to a guest, we must prevent a guest from turning off
> HCR_EL2.E2H, which is covered by not advertising the FEAT_E2H0 architecture
> feature.
> 
> To allow people to run a guest in non-VHE mode, KVM introduced the
> KVM_ARM_VCPU_HAS_EL2_E2H0 feature flag, which will allow control over
> HCR_EL2.E2H, but at the cost of turning off FEAT_NV2.

All of that has been captured at length in the kernel code, and I
think this is "too much information" for userspace. I'd rather we
stick to a pure description of what the various options mean to the
user.

> Add a kvmtool command line option "--e2h0" to set that feature bit when
> creating a guest, to gain non-VHE, but lose recursive nested virt.

How about:

"The --nested option allows a guest to boot at EL2 without FEAT_E2H0
 (i.e. mandating VHE support). While this is great for "modern"
 operating systems and hypervisors, a few legacy guests are stuck in a
 distant past.

 To support those, the --e2h0 option exposes FEAT_E2H0 to the guest,
 at the expense of a number of other features, such as FEAT_NV2. This
 is conditioned on the host itself supporting FEAT_E2H0."

> 
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  arm64/include/kvm/kvm-config-arch.h | 5 ++++-
>  arm64/kvm-cpu.c                     | 2 ++
>  2 files changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/arm64/include/kvm/kvm-config-arch.h b/arm64/include/kvm/kvm-config-arch.h
> index 44c43367b..73bf4211a 100644
> --- a/arm64/include/kvm/kvm-config-arch.h
> +++ b/arm64/include/kvm/kvm-config-arch.h
> @@ -11,6 +11,7 @@ struct kvm_config_arch {
>  	bool		has_pmuv3;
>  	bool		mte_disabled;
>  	bool		nested_virt;
> +	bool		e2h0;
>  	u64		kaslr_seed;
>  	enum irqchip_type irqchip;
>  	u64		fw_addr;
> @@ -63,6 +64,8 @@ int sve_vl_parser(const struct option *opt, const char *arg, int unset);
>  	OPT_U64('\0', "counter-offset", &(cfg)->counter_offset,			\
>  		"Specify the counter offset, defaulting to 0"),			\
>  	OPT_BOOLEAN('\0', "nested", &(cfg)->nested_virt,			\
> -		    "Start VCPUs in EL2 (for nested virt)"),
> +		    "Start VCPUs in EL2 (for nested virt)"),			\
> +	OPT_BOOLEAN('\0', "e2h0", &(cfg)->e2h0,					\
> +		    "Create guest without VHE support"),
>  
>  #endif /* ARM_COMMON__KVM_CONFIG_ARCH_H */
> diff --git a/arm64/kvm-cpu.c b/arm64/kvm-cpu.c
> index 42dc11dad..6eb76dff4 100644
> --- a/arm64/kvm-cpu.c
> +++ b/arm64/kvm-cpu.c
> @@ -76,6 +76,8 @@ static void kvm_cpu__select_features(struct kvm *kvm, struct kvm_vcpu_init *init
>  		if (!kvm__supports_extension(kvm, KVM_CAP_ARM_EL2))
>  			die("EL2 (nested virt) is not supported");
>  		init->features[0] |= 1UL << KVM_ARM_VCPU_HAS_EL2;
> +		if (kvm->cfg.arch.e2h0)
> +			init->features[0] |= 1UL << KVM_ARM_VCPU_HAS_EL2_E2H0;

This really should also check the capability in order to fail
gracefully on system that have no E2H0 support at all (or have it so
buggy that it is permanently disabled by the kernel):

+		if (kvm->cfg.arch.e2h0) {
+	  		if (!kvm__supports_extension(kvm, KVM_CAP_ARM_EL2_E2H0))
+				die("FEAT_E2H0 is not supported");
+			init->features[0] |= 1UL << KVM_ARM_VCPU_HAS_EL2_E2H0;
+		}

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH kvmtool v2 5/6] arm64: add FEAT_E2H0 support (TBC)
  2025-07-25 16:37   ` Marc Zyngier
@ 2025-07-26  9:01     ` Wei-Lin Chang
  2025-07-26  9:19       ` Marc Zyngier
  0 siblings, 1 reply; 13+ messages in thread
From: Wei-Lin Chang @ 2025-07-26  9:01 UTC (permalink / raw)
  To: Marc Zyngier, Andre Przywara
  Cc: Will Deacon, Julien Thierry, kvm, kvmarm, Alexandru Elisei

Hi all,

On Fri, Jul 25, 2025 at 05:37:12PM +0100, Marc Zyngier wrote:
> Hi Andre,
> 
> Thanks for picking this. A few nits below.
> 
> On Fri, 25 Jul 2025 15:40:59 +0100,
> Andre Przywara <andre.przywara@arm.com> wrote:
> > 
> > From: Marc Zyngier <maz@kernel.org>
> > 
> > To reduce code complexity, KVM only supports nested virtualisation in
> > VHE mode. So to allow recursive nested virtualisation, and be able to
> > expose FEAT_NV2 to a guest, we must prevent a guest from turning off
> > HCR_EL2.E2H, which is covered by not advertising the FEAT_E2H0 architecture
> > feature.
> > 
> > To allow people to run a guest in non-VHE mode, KVM introduced the
> > KVM_ARM_VCPU_HAS_EL2_E2H0 feature flag, which will allow control over
> > HCR_EL2.E2H, but at the cost of turning off FEAT_NV2.
> 
> All of that has been captured at length in the kernel code, and I
> think this is "too much information" for userspace. I'd rather we
> stick to a pure description of what the various options mean to the
> user.
> 
> > Add a kvmtool command line option "--e2h0" to set that feature bit when
> > creating a guest, to gain non-VHE, but lose recursive nested virt.
> 
> How about:
> 
> "The --nested option allows a guest to boot at EL2 without FEAT_E2H0
>  (i.e. mandating VHE support). While this is great for "modern"
>  operating systems and hypervisors, a few legacy guests are stuck in a
>  distant past.
> 
>  To support those, the --e2h0 option exposes FEAT_E2H0 to the guest,
>  at the expense of a number of other features, such as FEAT_NV2. This

Just a very small thing:

Will only mentioning FEAT_NV2 here lead people to think that FEAT_NV is
still available with --e2h0?
Maybe s/FEAT_NV2/FEAT_NV/ makes it clearer?

Thanks,
Wei-Lin Chang

>  is conditioned on the host itself supporting FEAT_E2H0."
> 
> > 
> > Signed-off-by: Marc Zyngier <maz@kernel.org>
> > Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> > ---
> >  arm64/include/kvm/kvm-config-arch.h | 5 ++++-
> >  arm64/kvm-cpu.c                     | 2 ++
> >  2 files changed, 6 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arm64/include/kvm/kvm-config-arch.h b/arm64/include/kvm/kvm-config-arch.h
> > index 44c43367b..73bf4211a 100644
> > --- a/arm64/include/kvm/kvm-config-arch.h
> > +++ b/arm64/include/kvm/kvm-config-arch.h
> > @@ -11,6 +11,7 @@ struct kvm_config_arch {
> >  	bool		has_pmuv3;
> >  	bool		mte_disabled;
> >  	bool		nested_virt;
> > +	bool		e2h0;
> >  	u64		kaslr_seed;
> >  	enum irqchip_type irqchip;
> >  	u64		fw_addr;
> > @@ -63,6 +64,8 @@ int sve_vl_parser(const struct option *opt, const char *arg, int unset);
> >  	OPT_U64('\0', "counter-offset", &(cfg)->counter_offset,			\
> >  		"Specify the counter offset, defaulting to 0"),			\
> >  	OPT_BOOLEAN('\0', "nested", &(cfg)->nested_virt,			\
> > -		    "Start VCPUs in EL2 (for nested virt)"),
> > +		    "Start VCPUs in EL2 (for nested virt)"),			\
> > +	OPT_BOOLEAN('\0', "e2h0", &(cfg)->e2h0,					\
> > +		    "Create guest without VHE support"),
> >  
> >  #endif /* ARM_COMMON__KVM_CONFIG_ARCH_H */
> > diff --git a/arm64/kvm-cpu.c b/arm64/kvm-cpu.c
> > index 42dc11dad..6eb76dff4 100644
> > --- a/arm64/kvm-cpu.c
> > +++ b/arm64/kvm-cpu.c
> > @@ -76,6 +76,8 @@ static void kvm_cpu__select_features(struct kvm *kvm, struct kvm_vcpu_init *init
> >  		if (!kvm__supports_extension(kvm, KVM_CAP_ARM_EL2))
> >  			die("EL2 (nested virt) is not supported");
> >  		init->features[0] |= 1UL << KVM_ARM_VCPU_HAS_EL2;
> > +		if (kvm->cfg.arch.e2h0)
> > +			init->features[0] |= 1UL << KVM_ARM_VCPU_HAS_EL2_E2H0;
> 
> This really should also check the capability in order to fail
> gracefully on system that have no E2H0 support at all (or have it so
> buggy that it is permanently disabled by the kernel):
> 
> +		if (kvm->cfg.arch.e2h0) {
> +	  		if (!kvm__supports_extension(kvm, KVM_CAP_ARM_EL2_E2H0))
> +				die("FEAT_E2H0 is not supported");
> +			init->features[0] |= 1UL << KVM_ARM_VCPU_HAS_EL2_E2H0;
> +		}
> 
> Thanks,
> 
> 	M.
> 
> -- 
> Without deviation from the norm, progress is not possible.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH kvmtool v2 5/6] arm64: add FEAT_E2H0 support (TBC)
  2025-07-26  9:01     ` Wei-Lin Chang
@ 2025-07-26  9:19       ` Marc Zyngier
  2025-07-26 10:11         ` Wei-Lin Chang
  0 siblings, 1 reply; 13+ messages in thread
From: Marc Zyngier @ 2025-07-26  9:19 UTC (permalink / raw)
  To: Wei-Lin Chang
  Cc: Andre Przywara, Will Deacon, Julien Thierry, kvm, kvmarm,
	Alexandru Elisei

On Sat, 26 Jul 2025 10:01:25 +0100,
Wei-Lin Chang <r09922117@csie.ntu.edu.tw> wrote:
> 
> Hi all,
> 
> On Fri, Jul 25, 2025 at 05:37:12PM +0100, Marc Zyngier wrote:
> > Hi Andre,
> > 
> > Thanks for picking this. A few nits below.
> > 
> > On Fri, 25 Jul 2025 15:40:59 +0100,
> > Andre Przywara <andre.przywara@arm.com> wrote:
> > > 
> > > From: Marc Zyngier <maz@kernel.org>
> > > 
> > > To reduce code complexity, KVM only supports nested virtualisation in
> > > VHE mode. So to allow recursive nested virtualisation, and be able to
> > > expose FEAT_NV2 to a guest, we must prevent a guest from turning off
> > > HCR_EL2.E2H, which is covered by not advertising the FEAT_E2H0 architecture
> > > feature.
> > > 
> > > To allow people to run a guest in non-VHE mode, KVM introduced the
> > > KVM_ARM_VCPU_HAS_EL2_E2H0 feature flag, which will allow control over
> > > HCR_EL2.E2H, but at the cost of turning off FEAT_NV2.
> > 
> > All of that has been captured at length in the kernel code, and I
> > think this is "too much information" for userspace. I'd rather we
> > stick to a pure description of what the various options mean to the
> > user.
> > 
> > > Add a kvmtool command line option "--e2h0" to set that feature bit when
> > > creating a guest, to gain non-VHE, but lose recursive nested virt.
> > 
> > How about:
> > 
> > "The --nested option allows a guest to boot at EL2 without FEAT_E2H0
> >  (i.e. mandating VHE support). While this is great for "modern"
> >  operating systems and hypervisors, a few legacy guests are stuck in a
> >  distant past.
> > 
> >  To support those, the --e2h0 option exposes FEAT_E2H0 to the guest,
> >  at the expense of a number of other features, such as FEAT_NV2. This
> 
> Just a very small thing:
> 
> Will only mentioning FEAT_NV2 here lead people to think that FEAT_NV is
> still available with --e2h0?
> Maybe s/FEAT_NV2/FEAT_NV/ makes it clearer?

Maybe. On the other hand, we never advertise the old FEAT_NV as such,
irrespective of the state of E2H. This is indicated by
ID_AA64MMFR4_EL1.NV_frac==0b0001 when NV is advertised. So I'm not
sure this changes anything, really.

Thanks,

	M.


-- 
Jazz isn't dead. It just smells funny.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH kvmtool v2 4/6] arm64: Add counter offset control
  2025-07-25 14:40 ` [PATCH kvmtool v2 4/6] arm64: Add counter offset control Andre Przywara
@ 2025-07-26  9:25   ` Marc Zyngier
  0 siblings, 0 replies; 13+ messages in thread
From: Marc Zyngier @ 2025-07-26  9:25 UTC (permalink / raw)
  To: Andre Przywara; +Cc: Will Deacon, Julien Thierry, kvm, kvmarm, Alexandru Elisei

On Fri, 25 Jul 2025 15:40:58 +0100,
Andre Przywara <andre.przywara@arm.com> wrote:
> 
> From: Marc Zyngier <maz@kernel.org>
> 
> KVM now allows to offset the arch timer counter values reported to a
> guest, via a per-VM ioctl. This is conceptually similar to the effects
> of CNTVOFF_EL2, but applies to both the emulated physical and virtual
> counter, and also to all VCPUs.

Not really. Architecturally, there is only a single, global counter,
and multiple *views* of that counter.

What this option does is to offset the global counter. Then, the
various per-CPU offsets are applied on the views that can be affected
by them. This is cumulative. So for example, CNTVCT_EL0 is affected by
both the global offset and CNTVOFF_EL2 when read from EL0, EL1, and
EL2 when E2H==0. Behind the scenes, this is of course implemented with
CNTVOFF_EL2 (when it works), but that's not really relevant here.

I'd suggest something along the lines of:

"KVM allows the offsetting of the global counter in order to help with
 migration of a VM. This offset applies cumulatively with the offsets
 provided by the architecture.

 Although kvmtool doesn't provide a way to migrate a VM, controlling
 this offset is useful to test the timer subsystem."

Thanks,

	M.

-- 
Jazz isn't dead. It just smells funny.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH kvmtool v2 5/6] arm64: add FEAT_E2H0 support (TBC)
  2025-07-26  9:19       ` Marc Zyngier
@ 2025-07-26 10:11         ` Wei-Lin Chang
  2025-07-26 10:34           ` Marc Zyngier
  0 siblings, 1 reply; 13+ messages in thread
From: Wei-Lin Chang @ 2025-07-26 10:11 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: Andre Przywara, Will Deacon, Julien Thierry, kvm, kvmarm,
	Alexandru Elisei

On Sat, Jul 26, 2025 at 10:19:11AM +0100, Marc Zyngier wrote:
> On Sat, 26 Jul 2025 10:01:25 +0100,
> Wei-Lin Chang <r09922117@csie.ntu.edu.tw> wrote:
> > 
> > Hi all,
> > 
> > On Fri, Jul 25, 2025 at 05:37:12PM +0100, Marc Zyngier wrote:
> > > Hi Andre,
> > > 
> > > Thanks for picking this. A few nits below.
> > > 
> > > On Fri, 25 Jul 2025 15:40:59 +0100,
> > > Andre Przywara <andre.przywara@arm.com> wrote:
> > > > 
> > > > From: Marc Zyngier <maz@kernel.org>
> > > > 
> > > > To reduce code complexity, KVM only supports nested virtualisation in
> > > > VHE mode. So to allow recursive nested virtualisation, and be able to
> > > > expose FEAT_NV2 to a guest, we must prevent a guest from turning off
> > > > HCR_EL2.E2H, which is covered by not advertising the FEAT_E2H0 architecture
> > > > feature.
> > > > 
> > > > To allow people to run a guest in non-VHE mode, KVM introduced the
> > > > KVM_ARM_VCPU_HAS_EL2_E2H0 feature flag, which will allow control over
> > > > HCR_EL2.E2H, but at the cost of turning off FEAT_NV2.
> > > 
> > > All of that has been captured at length in the kernel code, and I
> > > think this is "too much information" for userspace. I'd rather we
> > > stick to a pure description of what the various options mean to the
> > > user.
> > > 
> > > > Add a kvmtool command line option "--e2h0" to set that feature bit when
> > > > creating a guest, to gain non-VHE, but lose recursive nested virt.
> > > 
> > > How about:
> > > 
> > > "The --nested option allows a guest to boot at EL2 without FEAT_E2H0
> > >  (i.e. mandating VHE support). While this is great for "modern"
> > >  operating systems and hypervisors, a few legacy guests are stuck in a
> > >  distant past.
> > > 
> > >  To support those, the --e2h0 option exposes FEAT_E2H0 to the guest,
> > >  at the expense of a number of other features, such as FEAT_NV2. This
> > 
> > Just a very small thing:
> > 
> > Will only mentioning FEAT_NV2 here lead people to think that FEAT_NV is
> > still available with --e2h0?
> > Maybe s/FEAT_NV2/FEAT_NV/ makes it clearer?
> 
> Maybe. On the other hand, we never advertise the old FEAT_NV as such,
> irrespective of the state of E2H. This is indicated by
> ID_AA64MMFR4_EL1.NV_frac==0b0001 when NV is advertised. So I'm not
> sure this changes anything, really.

Right, thanks for the input. Even if the user doesn't know what's up the L1
hypervisor will when it checks ID_AA64MMFR4_EL1 :)

Thanks,
Wei-Lin Chang

> 
> Thanks,
> 
> 	M.
> 
> 
> -- 
> Jazz isn't dead. It just smells funny.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH kvmtool v2 5/6] arm64: add FEAT_E2H0 support (TBC)
  2025-07-26 10:11         ` Wei-Lin Chang
@ 2025-07-26 10:34           ` Marc Zyngier
  0 siblings, 0 replies; 13+ messages in thread
From: Marc Zyngier @ 2025-07-26 10:34 UTC (permalink / raw)
  To: Wei-Lin Chang
  Cc: Andre Przywara, Will Deacon, Julien Thierry, kvm, kvmarm,
	Alexandru Elisei

On Sat, 26 Jul 2025 11:11:43 +0100,
Wei-Lin Chang <r09922117@csie.ntu.edu.tw> wrote:
> 
> On Sat, Jul 26, 2025 at 10:19:11AM +0100, Marc Zyngier wrote:
> > On Sat, 26 Jul 2025 10:01:25 +0100,
> > Wei-Lin Chang <r09922117@csie.ntu.edu.tw> wrote:
> > > 
> > > Hi all,
> > > 
> > > On Fri, Jul 25, 2025 at 05:37:12PM +0100, Marc Zyngier wrote:
> > > > Hi Andre,
> > > > 
> > > > Thanks for picking this. A few nits below.
> > > > 
> > > > On Fri, 25 Jul 2025 15:40:59 +0100,
> > > > Andre Przywara <andre.przywara@arm.com> wrote:
> > > > > 
> > > > > From: Marc Zyngier <maz@kernel.org>
> > > > > 
> > > > > To reduce code complexity, KVM only supports nested virtualisation in
> > > > > VHE mode. So to allow recursive nested virtualisation, and be able to
> > > > > expose FEAT_NV2 to a guest, we must prevent a guest from turning off
> > > > > HCR_EL2.E2H, which is covered by not advertising the FEAT_E2H0 architecture
> > > > > feature.
> > > > > 
> > > > > To allow people to run a guest in non-VHE mode, KVM introduced the
> > > > > KVM_ARM_VCPU_HAS_EL2_E2H0 feature flag, which will allow control over
> > > > > HCR_EL2.E2H, but at the cost of turning off FEAT_NV2.
> > > > 
> > > > All of that has been captured at length in the kernel code, and I
> > > > think this is "too much information" for userspace. I'd rather we
> > > > stick to a pure description of what the various options mean to the
> > > > user.
> > > > 
> > > > > Add a kvmtool command line option "--e2h0" to set that feature bit when
> > > > > creating a guest, to gain non-VHE, but lose recursive nested virt.
> > > > 
> > > > How about:
> > > > 
> > > > "The --nested option allows a guest to boot at EL2 without FEAT_E2H0
> > > >  (i.e. mandating VHE support). While this is great for "modern"
> > > >  operating systems and hypervisors, a few legacy guests are stuck in a
> > > >  distant past.
> > > > 
> > > >  To support those, the --e2h0 option exposes FEAT_E2H0 to the guest,
> > > >  at the expense of a number of other features, such as FEAT_NV2. This
> > > 
> > > Just a very small thing:
> > > 
> > > Will only mentioning FEAT_NV2 here lead people to think that FEAT_NV is
> > > still available with --e2h0?
> > > Maybe s/FEAT_NV2/FEAT_NV/ makes it clearer?
> > 
> > Maybe. On the other hand, we never advertise the old FEAT_NV as such,
> > irrespective of the state of E2H. This is indicated by
> > ID_AA64MMFR4_EL1.NV_frac==0b0001 when NV is advertised. So I'm not
> > sure this changes anything, really.
> 
> Right, thanks for the input. Even if the user doesn't know what's up the L1
> hypervisor will when it checks ID_AA64MMFR4_EL1 :)

Exactly.

The idea behind this relaxation to the architecture was that SW that
is relying on FEAT_NV being advertised through ID_AA64MMFR2_EL1 would
find that the "old style NV" isn't supported, while SW that has
followed the architecture would also look at NV_frac, and find that
the support that matters actually exists.

It means that we potentially leave behind some hypervisors that
haven't caught up with NV_frac yet, but since we don't know of any
other NV-capable hypervisor that could run under KVM, that's not
really a problem! ;-)

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2025-07-26 10:34 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-25 14:40 [PATCH kvmtool v2 0/6] arm64: Nested virtualization support Andre Przywara
2025-07-25 14:40 ` [PATCH kvmtool v2 1/6] Sync kernel UAPI headers with v6.16-rc1 Andre Przywara
2025-07-25 14:40 ` [PATCH kvmtool v2 2/6] arm64: Initial nested virt support Andre Przywara
2025-07-25 14:40 ` [PATCH kvmtool v2 3/6] arm64: nested: add support for setting maintenance IRQ Andre Przywara
2025-07-25 14:40 ` [PATCH kvmtool v2 4/6] arm64: Add counter offset control Andre Przywara
2025-07-26  9:25   ` Marc Zyngier
2025-07-25 14:40 ` [PATCH kvmtool v2 5/6] arm64: add FEAT_E2H0 support (TBC) Andre Przywara
2025-07-25 16:37   ` Marc Zyngier
2025-07-26  9:01     ` Wei-Lin Chang
2025-07-26  9:19       ` Marc Zyngier
2025-07-26 10:11         ` Wei-Lin Chang
2025-07-26 10:34           ` Marc Zyngier
2025-07-25 14:41 ` [PATCH kvmtool v2 6/6] arm64: Generate HYP timer interrupt specifiers Andre Przywara

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).