* [PATCH kvmtool v6 1/6] arm64: Initial nested virt support
2026-02-11 13:12 [PATCH kvmtool v6 0/6] arm64: Nested virtualization support Andre Przywara
@ 2026-02-11 13:12 ` Andre Przywara
2026-02-11 13:12 ` [PATCH kvmtool v6 2/6] arm64: nested: Add support for setting maintenance IRQ Andre Przywara
` (4 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Andre Przywara @ 2026-02-11 13:12 UTC (permalink / raw)
To: Will Deacon, Julien Thierry
Cc: Marc Zyngier, Sascha Bischoff, kvm, kvmarm, Alexandru Elisei
The ARMv8.3 architecture update includes support for nested
virtualization. Allow the user to specify "--nested" to start a guest in
(virtual) EL2 instead of EL1.
This will also change the PSCI conduit from HVC to SMC in the device
tree.
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Sascha Bischoff <sascha.bischoff@arm.com>
---
arm64/fdt.c | 5 ++++-
arm64/include/kvm/kvm-config-arch.h | 5 ++++-
arm64/kvm-cpu.c | 12 +++++++++++-
3 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/arm64/fdt.c b/arm64/fdt.c
index df77758..98f1dd9 100644
--- a/arm64/fdt.c
+++ b/arm64/fdt.c
@@ -205,7 +205,10 @@ static int setup_fdt(struct kvm *kvm)
_FDT(fdt_property_string(fdt, "compatible", "arm,psci"));
fns = &psci_0_1_fns;
}
- _FDT(fdt_property_string(fdt, "method", "hvc"));
+ if (kvm->cfg.arch.nested_virt)
+ _FDT(fdt_property_string(fdt, "method", "smc"));
+ else
+ _FDT(fdt_property_string(fdt, "method", "hvc"));
_FDT(fdt_property_cell(fdt, "cpu_suspend", fns->cpu_suspend));
_FDT(fdt_property_cell(fdt, "cpu_off", fns->cpu_off));
_FDT(fdt_property_cell(fdt, "cpu_on", fns->cpu_on));
diff --git a/arm64/include/kvm/kvm-config-arch.h b/arm64/include/kvm/kvm-config-arch.h
index ee031f0..a1dac28 100644
--- a/arm64/include/kvm/kvm-config-arch.h
+++ b/arm64/include/kvm/kvm-config-arch.h
@@ -10,6 +10,7 @@ struct kvm_config_arch {
bool aarch32_guest;
bool has_pmuv3;
bool mte_disabled;
+ bool nested_virt;
u64 kaslr_seed;
enum irqchip_type irqchip;
u64 fw_addr;
@@ -57,6 +58,8 @@ int sve_vl_parser(const struct option *opt, const char *arg, int unset);
"Type of interrupt controller to emulate in the guest", \
irqchip_parser, NULL), \
OPT_U64('\0', "firmware-address", &(cfg)->fw_addr, \
- "Address where firmware should be loaded"),
+ "Address where firmware should be loaded"), \
+ OPT_BOOLEAN('\0', "nested", &(cfg)->nested_virt, \
+ "Start VCPUs in EL2 (for nested virt)"),
#endif /* ARM_COMMON__KVM_CONFIG_ARCH_H */
diff --git a/arm64/kvm-cpu.c b/arm64/kvm-cpu.c
index 94c08a4..42dc11d 100644
--- a/arm64/kvm-cpu.c
+++ b/arm64/kvm-cpu.c
@@ -71,6 +71,12 @@ static void kvm_cpu__select_features(struct kvm *kvm, struct kvm_vcpu_init *init
/* Enable SVE if available */
if (kvm__supports_extension(kvm, KVM_CAP_ARM_SVE))
init->features[0] |= 1UL << KVM_ARM_VCPU_SVE;
+
+ if (kvm->cfg.arch.nested_virt) {
+ if (!kvm__supports_extension(kvm, KVM_CAP_ARM_EL2))
+ die("EL2 (nested virt) is not supported");
+ init->features[0] |= 1UL << KVM_ARM_VCPU_HAS_EL2;
+ }
}
static int vcpu_configure_sve(struct kvm_cpu *vcpu)
@@ -313,7 +319,11 @@ static void reset_vcpu_aarch64(struct kvm_cpu *vcpu)
reg.addr = (u64)&data;
/* pstate = all interrupts masked */
- data = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1h;
+ data = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT;
+ if (vcpu->kvm->cfg.arch.nested_virt)
+ data |= PSR_MODE_EL2h;
+ else
+ data |= PSR_MODE_EL1h;
reg.id = ARM64_CORE_REG(regs.pstate);
if (ioctl(vcpu->vcpu_fd, KVM_SET_ONE_REG, ®) < 0)
die_perror("KVM_SET_ONE_REG failed (spsr[EL1])");
--
2.47.3
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH kvmtool v6 2/6] arm64: nested: Add support for setting maintenance IRQ
2026-02-11 13:12 [PATCH kvmtool v6 0/6] arm64: Nested virtualization support Andre Przywara
2026-02-11 13:12 ` [PATCH kvmtool v6 1/6] arm64: Initial nested virt support Andre Przywara
@ 2026-02-11 13:12 ` Andre Przywara
2026-02-11 13:12 ` [PATCH kvmtool v6 3/6] arm64: Add counter offset control Andre Przywara
` (3 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Andre Przywara @ 2026-02-11 13:12 UTC (permalink / raw)
To: Will Deacon, Julien Thierry
Cc: Marc Zyngier, Sascha Bischoff, kvm, kvmarm, Alexandru Elisei
Uses the new VGIC KVM device attribute to set the maintenance IRQ.
This is fixed to use PPI 9, as a platform decision made by kvmtool,
matching the SBSA recommendation.
Use the opportunity to pass the kvm pointer to gic__generate_fdt_nodes(),
as this simplifies the call and allows us access to the nested_virt
config variable on the way.
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
arm64/arm-cpu.c | 2 +-
arm64/gic.c | 29 +++++++++++++++++++++++++++--
arm64/include/kvm/gic.h | 2 +-
3 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/arm64/arm-cpu.c b/arm64/arm-cpu.c
index 69bb2cb..0843ac0 100644
--- a/arm64/arm-cpu.c
+++ b/arm64/arm-cpu.c
@@ -14,7 +14,7 @@ static void generate_fdt_nodes(void *fdt, struct kvm *kvm)
{
int timer_interrupts[4] = {13, 14, 11, 10};
- gic__generate_fdt_nodes(fdt, kvm->cfg.arch.irqchip);
+ gic__generate_fdt_nodes(fdt, kvm);
timer__generate_fdt_nodes(fdt, kvm, timer_interrupts);
pmu__generate_fdt_nodes(fdt, kvm);
}
diff --git a/arm64/gic.c b/arm64/gic.c
index b0d3a1a..b0be9e5 100644
--- a/arm64/gic.c
+++ b/arm64/gic.c
@@ -11,6 +11,8 @@
#define IRQCHIP_GIC 0
+#define GIC_MAINT_IRQ 9
+
static int gic_fd = -1;
static u64 gic_redists_base;
static u64 gic_redists_size;
@@ -302,10 +304,15 @@ static int gic__init_gic(struct kvm *kvm)
int lines = irq__get_nr_allocated_lines();
u32 nr_irqs = ALIGN(lines, 32) + GIC_SPI_IRQ_BASE;
+ u32 maint_irq = GIC_PPI_IRQ_BASE + GIC_MAINT_IRQ;
struct kvm_device_attr nr_irqs_attr = {
.group = KVM_DEV_ARM_VGIC_GRP_NR_IRQS,
.addr = (u64)(unsigned long)&nr_irqs,
};
+ struct kvm_device_attr maint_irq_attr = {
+ .group = KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ,
+ .addr = (u64)(unsigned long)&maint_irq,
+ };
struct kvm_device_attr vgic_init_attr = {
.group = KVM_DEV_ARM_VGIC_GRP_CTRL,
.attr = KVM_DEV_ARM_VGIC_CTRL_INIT,
@@ -325,6 +332,16 @@ static int gic__init_gic(struct kvm *kvm)
return ret;
}
+ if (kvm->cfg.arch.nested_virt) {
+ ret = ioctl(gic_fd, KVM_HAS_DEVICE_ATTR, &maint_irq_attr);
+ if (!ret)
+ ret = ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &maint_irq_attr);
+ if (ret) {
+ pr_err("could not set maintenance IRQ\n");
+ return ret;
+ }
+ }
+
irq__routing_init(kvm);
if (!ioctl(gic_fd, KVM_HAS_DEVICE_ATTR, &vgic_init_attr)) {
@@ -342,7 +359,7 @@ static int gic__init_gic(struct kvm *kvm)
}
late_init(gic__init_gic)
-void gic__generate_fdt_nodes(void *fdt, enum irqchip_type type)
+void gic__generate_fdt_nodes(void *fdt, struct kvm *kvm)
{
const char *compatible, *msi_compatible = NULL;
u64 msi_prop[2];
@@ -350,8 +367,12 @@ void gic__generate_fdt_nodes(void *fdt, enum irqchip_type type)
cpu_to_fdt64(ARM_GIC_DIST_BASE), cpu_to_fdt64(ARM_GIC_DIST_SIZE),
0, 0, /* to be filled */
};
+ u32 maint_irq[] = {
+ cpu_to_fdt32(GIC_FDT_IRQ_TYPE_PPI), cpu_to_fdt32(GIC_MAINT_IRQ),
+ cpu_to_fdt32(gic__get_fdt_irq_cpumask(kvm) | IRQ_TYPE_LEVEL_HIGH)
+ };
- switch (type) {
+ switch (kvm->cfg.arch.irqchip) {
case IRQCHIP_GICV2M:
msi_compatible = "arm,gic-v2m-frame";
/* fall-through */
@@ -377,6 +398,10 @@ void gic__generate_fdt_nodes(void *fdt, enum irqchip_type type)
_FDT(fdt_property_cell(fdt, "#interrupt-cells", GIC_FDT_IRQ_NUM_CELLS));
_FDT(fdt_property(fdt, "interrupt-controller", NULL, 0));
_FDT(fdt_property(fdt, "reg", reg_prop, sizeof(reg_prop)));
+ if (kvm->cfg.arch.nested_virt) {
+ _FDT(fdt_property(fdt, "interrupts", maint_irq,
+ sizeof(maint_irq)));
+ }
_FDT(fdt_property_cell(fdt, "phandle", PHANDLE_GIC));
_FDT(fdt_property_cell(fdt, "#address-cells", 2));
_FDT(fdt_property_cell(fdt, "#size-cells", 2));
diff --git a/arm64/include/kvm/gic.h b/arm64/include/kvm/gic.h
index ad8bcbf..8490cca 100644
--- a/arm64/include/kvm/gic.h
+++ b/arm64/include/kvm/gic.h
@@ -36,7 +36,7 @@ struct kvm;
int gic__alloc_irqnum(void);
int gic__create(struct kvm *kvm, enum irqchip_type type);
int gic__create_gicv2m_frame(struct kvm *kvm, u64 msi_frame_addr);
-void gic__generate_fdt_nodes(void *fdt, enum irqchip_type type);
+void gic__generate_fdt_nodes(void *fdt, struct kvm *kvm);
u32 gic__get_fdt_irq_cpumask(struct kvm *kvm);
int gic__add_irqfd(struct kvm *kvm, unsigned int gsi, int trigger_fd,
--
2.47.3
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH kvmtool v6 3/6] arm64: Add counter offset control
2026-02-11 13:12 [PATCH kvmtool v6 0/6] arm64: Nested virtualization support Andre Przywara
2026-02-11 13:12 ` [PATCH kvmtool v6 1/6] arm64: Initial nested virt support Andre Przywara
2026-02-11 13:12 ` [PATCH kvmtool v6 2/6] arm64: nested: Add support for setting maintenance IRQ Andre Przywara
@ 2026-02-11 13:12 ` Andre Przywara
2026-02-11 13:12 ` [PATCH kvmtool v6 4/6] arm64: Add FEAT_E2H0 support Andre Przywara
` (2 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Andre Przywara @ 2026-02-11 13:12 UTC (permalink / raw)
To: Will Deacon, Julien Thierry
Cc: Marc Zyngier, Sascha Bischoff, kvm, kvmarm, Alexandru Elisei
From: Marc Zyngier <maz@kernel.org>
KVM allows the offsetting of the global counter in order to help with
migration of a VM. This offset applies cumulatively with the offsets
provided by the architecture.
Although kvmtool doesn't provide a way to migrate a VM, controlling
this offset is useful to test the timer subsystem.
Add the command line option --counter-offset to allow setting this value
when creating a VM.
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Sascha Bischoff <sascha.bischoff@arm.com>
---
arm64/include/kvm/kvm-config-arch.h | 3 +++
arm64/kvm.c | 17 +++++++++++++++++
2 files changed, 20 insertions(+)
diff --git a/arm64/include/kvm/kvm-config-arch.h b/arm64/include/kvm/kvm-config-arch.h
index a1dac28..44c4336 100644
--- a/arm64/include/kvm/kvm-config-arch.h
+++ b/arm64/include/kvm/kvm-config-arch.h
@@ -14,6 +14,7 @@ struct kvm_config_arch {
u64 kaslr_seed;
enum irqchip_type irqchip;
u64 fw_addr;
+ u64 counter_offset;
unsigned int sve_max_vq;
bool no_pvtime;
};
@@ -59,6 +60,8 @@ int sve_vl_parser(const struct option *opt, const char *arg, int unset);
irqchip_parser, NULL), \
OPT_U64('\0', "firmware-address", &(cfg)->fw_addr, \
"Address where firmware should be loaded"), \
+ OPT_U64('\0', "counter-offset", &(cfg)->counter_offset, \
+ "Specify the counter offset, defaulting to 0"), \
OPT_BOOLEAN('\0', "nested", &(cfg)->nested_virt, \
"Start VCPUs in EL2 (for nested virt)"),
diff --git a/arm64/kvm.c b/arm64/kvm.c
index 23b4dab..6e971dd 100644
--- a/arm64/kvm.c
+++ b/arm64/kvm.c
@@ -119,6 +119,22 @@ static void kvm__arch_enable_mte(struct kvm *kvm)
pr_debug("MTE capability enabled");
}
+static void kvm__arch_set_counter_offset(struct kvm *kvm)
+{
+ struct kvm_arm_counter_offset offset = {
+ .counter_offset = kvm->cfg.arch.counter_offset,
+ };
+
+ if (!kvm->cfg.arch.counter_offset)
+ return;
+
+ if (!kvm__supports_extension(kvm, KVM_CAP_COUNTER_OFFSET))
+ die("No support for global counter offset");
+
+ if (ioctl(kvm->vm_fd, KVM_ARM_SET_COUNTER_OFFSET, &offset))
+ die_perror("KVM_ARM_SET_COUNTER_OFFSET");
+}
+
void kvm__arch_init(struct kvm *kvm)
{
/* Create the virtual GIC. */
@@ -126,6 +142,7 @@ void kvm__arch_init(struct kvm *kvm)
die("Failed to create virtual GIC");
kvm__arch_enable_mte(kvm);
+ kvm__arch_set_counter_offset(kvm);
}
static u64 kvm__arch_get_payload_region_size(struct kvm *kvm)
--
2.47.3
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH kvmtool v6 4/6] arm64: Add FEAT_E2H0 support
2026-02-11 13:12 [PATCH kvmtool v6 0/6] arm64: Nested virtualization support Andre Przywara
` (2 preceding siblings ...)
2026-02-11 13:12 ` [PATCH kvmtool v6 3/6] arm64: Add counter offset control Andre Przywara
@ 2026-02-11 13:12 ` Andre Przywara
2026-02-11 13:12 ` [PATCH kvmtool v6 5/6] arm64: Generate HYP timer interrupt specifiers Andre Przywara
2026-02-11 13:12 ` [PATCH kvmtool v6 6/6] arm64: Handle virtio endianness reset when running nested Andre Przywara
5 siblings, 0 replies; 9+ messages in thread
From: Andre Przywara @ 2026-02-11 13:12 UTC (permalink / raw)
To: Will Deacon, Julien Thierry
Cc: Marc Zyngier, Sascha Bischoff, kvm, kvmarm, Alexandru Elisei
From: Marc Zyngier <maz@kernel.org>
The --nested option allows a guest to boot at EL2 without FEAT_E2H0
(i.e. mandating VHE support). While this is great for "modern" operating
systems and hypervisors, a few legacy guests are stuck in a distant past.
To support those, add the --e2h0 command line option, that exposes
FEAT_E2H0 to the guest, at the expense of a number of other features, such
as FEAT_NV2. This is conditioned on the host itself supporting FEAT_E2H0.
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Sascha Bischoff <sascha.bischoff@arm.com>
---
arm64/include/kvm/kvm-config-arch.h | 5 ++++-
arm64/kvm-cpu.c | 5 +++++
arm64/kvm.c | 3 +++
3 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/arm64/include/kvm/kvm-config-arch.h b/arm64/include/kvm/kvm-config-arch.h
index 44c4336..73bf421 100644
--- a/arm64/include/kvm/kvm-config-arch.h
+++ b/arm64/include/kvm/kvm-config-arch.h
@@ -11,6 +11,7 @@ struct kvm_config_arch {
bool has_pmuv3;
bool mte_disabled;
bool nested_virt;
+ bool e2h0;
u64 kaslr_seed;
enum irqchip_type irqchip;
u64 fw_addr;
@@ -63,6 +64,8 @@ int sve_vl_parser(const struct option *opt, const char *arg, int unset);
OPT_U64('\0', "counter-offset", &(cfg)->counter_offset, \
"Specify the counter offset, defaulting to 0"), \
OPT_BOOLEAN('\0', "nested", &(cfg)->nested_virt, \
- "Start VCPUs in EL2 (for nested virt)"),
+ "Start VCPUs in EL2 (for nested virt)"), \
+ OPT_BOOLEAN('\0', "e2h0", &(cfg)->e2h0, \
+ "Create guest without VHE support"),
#endif /* ARM_COMMON__KVM_CONFIG_ARCH_H */
diff --git a/arm64/kvm-cpu.c b/arm64/kvm-cpu.c
index 42dc11d..5e4f3a7 100644
--- a/arm64/kvm-cpu.c
+++ b/arm64/kvm-cpu.c
@@ -76,6 +76,11 @@ static void kvm_cpu__select_features(struct kvm *kvm, struct kvm_vcpu_init *init
if (!kvm__supports_extension(kvm, KVM_CAP_ARM_EL2))
die("EL2 (nested virt) is not supported");
init->features[0] |= 1UL << KVM_ARM_VCPU_HAS_EL2;
+ if (kvm->cfg.arch.e2h0) {
+ if (!kvm__supports_extension(kvm, KVM_CAP_ARM_EL2_E2H0))
+ die("FEAT_E2H0 is not supported");
+ init->features[0] |= 1UL << KVM_ARM_VCPU_HAS_EL2_E2H0;
+ }
}
}
diff --git a/arm64/kvm.c b/arm64/kvm.c
index 6e971dd..4ce2493 100644
--- a/arm64/kvm.c
+++ b/arm64/kvm.c
@@ -440,6 +440,9 @@ void kvm__arch_validate_cfg(struct kvm *kvm)
kvm->cfg.ram_addr + kvm->cfg.ram_size > SZ_4G) {
die("RAM extends above 4GB");
}
+
+ if (kvm->cfg.arch.e2h0 && !kvm->cfg.arch.nested_virt)
+ pr_warning("--e2h0 requires --nested, ignoring");
}
u64 kvm__arch_default_ram_address(void)
--
2.47.3
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH kvmtool v6 5/6] arm64: Generate HYP timer interrupt specifiers
2026-02-11 13:12 [PATCH kvmtool v6 0/6] arm64: Nested virtualization support Andre Przywara
` (3 preceding siblings ...)
2026-02-11 13:12 ` [PATCH kvmtool v6 4/6] arm64: Add FEAT_E2H0 support Andre Przywara
@ 2026-02-11 13:12 ` Andre Przywara
2026-02-11 13:12 ` [PATCH kvmtool v6 6/6] arm64: Handle virtio endianness reset when running nested Andre Przywara
5 siblings, 0 replies; 9+ messages in thread
From: Andre Przywara @ 2026-02-11 13:12 UTC (permalink / raw)
To: Will Deacon, Julien Thierry
Cc: Marc Zyngier, Sascha Bischoff, kvm, kvmarm, Alexandru Elisei
From: Marc Zyngier <maz@kernel.org>
FEAT_VHE introduced a non-secure EL2 virtual timer, along with its
interrupt line. Consequently the arch timer DT binding introduced a fifth
interrupt to communicate this interrupt number.
Refactor the interrupts property generation code to deal with a variable
number of interrupts, and forward five interrupts instead of four in case
nested virt is enabled.
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Sascha Bischoff <sascha.bischoff@arm.com>
---
arm64/arm-cpu.c | 4 +---
arm64/include/kvm/timer.h | 2 +-
arm64/timer.c | 29 ++++++++++++-----------------
3 files changed, 14 insertions(+), 21 deletions(-)
diff --git a/arm64/arm-cpu.c b/arm64/arm-cpu.c
index 0843ac0..5b5484d 100644
--- a/arm64/arm-cpu.c
+++ b/arm64/arm-cpu.c
@@ -12,10 +12,8 @@
static void generate_fdt_nodes(void *fdt, struct kvm *kvm)
{
- int timer_interrupts[4] = {13, 14, 11, 10};
-
gic__generate_fdt_nodes(fdt, kvm);
- timer__generate_fdt_nodes(fdt, kvm, timer_interrupts);
+ timer__generate_fdt_nodes(fdt, kvm);
pmu__generate_fdt_nodes(fdt, kvm);
}
diff --git a/arm64/include/kvm/timer.h b/arm64/include/kvm/timer.h
index 928e9ea..81e093e 100644
--- a/arm64/include/kvm/timer.h
+++ b/arm64/include/kvm/timer.h
@@ -1,6 +1,6 @@
#ifndef ARM_COMMON__TIMER_H
#define ARM_COMMON__TIMER_H
-void timer__generate_fdt_nodes(void *fdt, struct kvm *kvm, int *irqs);
+void timer__generate_fdt_nodes(void *fdt, struct kvm *kvm);
#endif /* ARM_COMMON__TIMER_H */
diff --git a/arm64/timer.c b/arm64/timer.c
index 861f2d9..2ac6144 100644
--- a/arm64/timer.c
+++ b/arm64/timer.c
@@ -5,31 +5,26 @@
#include "kvm/timer.h"
#include "kvm/util.h"
-void timer__generate_fdt_nodes(void *fdt, struct kvm *kvm, int *irqs)
+void timer__generate_fdt_nodes(void *fdt, struct kvm *kvm)
{
const char compatible[] = "arm,armv8-timer\0arm,armv7-timer";
u32 cpu_mask = gic__get_fdt_irq_cpumask(kvm);
- u32 irq_prop[] = {
- cpu_to_fdt32(GIC_FDT_IRQ_TYPE_PPI),
- cpu_to_fdt32(irqs[0]),
- cpu_to_fdt32(cpu_mask | IRQ_TYPE_LEVEL_LOW),
+ int irqs[5] = {13, 14, 11, 10, 12};
+ int nr = ARRAY_SIZE(irqs);
+ u32 irq_prop[nr * 3];
- cpu_to_fdt32(GIC_FDT_IRQ_TYPE_PPI),
- cpu_to_fdt32(irqs[1]),
- cpu_to_fdt32(cpu_mask | IRQ_TYPE_LEVEL_LOW),
+ if (!kvm->cfg.arch.nested_virt)
+ nr--;
- cpu_to_fdt32(GIC_FDT_IRQ_TYPE_PPI),
- cpu_to_fdt32(irqs[2]),
- cpu_to_fdt32(cpu_mask | IRQ_TYPE_LEVEL_LOW),
-
- cpu_to_fdt32(GIC_FDT_IRQ_TYPE_PPI),
- cpu_to_fdt32(irqs[3]),
- cpu_to_fdt32(cpu_mask | IRQ_TYPE_LEVEL_LOW),
- };
+ for (int i = 0; i < nr; i++) {
+ irq_prop[i * 3 + 0] = cpu_to_fdt32(GIC_FDT_IRQ_TYPE_PPI);
+ irq_prop[i * 3 + 1] = cpu_to_fdt32(irqs[i]);
+ irq_prop[i * 3 + 2] = cpu_to_fdt32(cpu_mask | IRQ_TYPE_LEVEL_LOW);
+ }
_FDT(fdt_begin_node(fdt, "timer"));
_FDT(fdt_property(fdt, "compatible", compatible, sizeof(compatible)));
- _FDT(fdt_property(fdt, "interrupts", irq_prop, sizeof(irq_prop)));
+ _FDT(fdt_property(fdt, "interrupts", irq_prop, nr * 3 * sizeof(irq_prop[0])));
_FDT(fdt_property(fdt, "always-on", NULL, 0));
if (kvm->cfg.arch.force_cntfrq > 0)
_FDT(fdt_property_cell(fdt, "clock-frequency", kvm->cfg.arch.force_cntfrq));
--
2.47.3
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH kvmtool v6 6/6] arm64: Handle virtio endianness reset when running nested
2026-02-11 13:12 [PATCH kvmtool v6 0/6] arm64: Nested virtualization support Andre Przywara
` (4 preceding siblings ...)
2026-02-11 13:12 ` [PATCH kvmtool v6 5/6] arm64: Generate HYP timer interrupt specifiers Andre Przywara
@ 2026-02-11 13:12 ` Andre Przywara
2026-02-11 14:08 ` Marc Zyngier
5 siblings, 1 reply; 9+ messages in thread
From: Andre Przywara @ 2026-02-11 13:12 UTC (permalink / raw)
To: Will Deacon, Julien Thierry
Cc: Marc Zyngier, Sascha Bischoff, kvm, kvmarm, Alexandru Elisei
From: Marc Zyngier <maz@kernel.org>
When running an EL2 guest, we need to make sure we don't sample
SCTLR_EL1 to work out the virtio endianness, as this is likely
to be a bit random.
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
arm64/include/kvm/kvm-cpu-arch.h | 5 ++--
arm64/kvm-cpu.c | 47 +++++++++++++++++++++++++-------
2 files changed, 40 insertions(+), 12 deletions(-)
diff --git a/arm64/include/kvm/kvm-cpu-arch.h b/arm64/include/kvm/kvm-cpu-arch.h
index 1af394a..85646ad 100644
--- a/arm64/include/kvm/kvm-cpu-arch.h
+++ b/arm64/include/kvm/kvm-cpu-arch.h
@@ -10,8 +10,9 @@
#define ARM_MPIDR_HWID_BITMASK 0xFF00FFFFFFUL
#define ARM_CPU_ID 3, 0, 0, 0
#define ARM_CPU_ID_MPIDR 5
-#define ARM_CPU_CTRL 3, 0, 1, 0
-#define ARM_CPU_CTRL_SCTLR_EL1 0
+#define SYS_SCTLR_EL1 3, 4, 1, 0, 0
+#define SYS_SCTLR_EL2 3, 4, 1, 0, 0
+#define SYS_HCR_EL2 3, 4, 1, 1, 0
struct kvm_cpu {
pthread_t thread;
diff --git a/arm64/kvm-cpu.c b/arm64/kvm-cpu.c
index 5e4f3a7..35e1c63 100644
--- a/arm64/kvm-cpu.c
+++ b/arm64/kvm-cpu.c
@@ -12,6 +12,7 @@
#define SCTLR_EL1_E0E_MASK (1 << 24)
#define SCTLR_EL1_EE_MASK (1 << 25)
+#define HCR_EL2_TGE (1 << 27)
static int debug_fd;
@@ -408,7 +409,8 @@ int kvm_cpu__get_endianness(struct kvm_cpu *vcpu)
{
struct kvm_one_reg reg;
u64 psr;
- u64 sctlr;
+ u64 sctlr, bit;
+ u64 hcr = 0;
/*
* Quoting the definition given by Peter Maydell:
@@ -419,8 +421,9 @@ int kvm_cpu__get_endianness(struct kvm_cpu *vcpu)
* We first check for an AArch32 guest: its endianness can
* change when using SETEND, which affects the CPSR.E bit.
*
- * If we're AArch64, use SCTLR_EL1.E0E if access comes from
- * EL0, and SCTLR_EL1.EE if access comes from EL1.
+ * If we're AArch64, determine which SCTLR register to use,
+ * depending on NV being used or not. Then use either the E0E
+ * bit for EL0, or the EE bit for EL1/EL2.
*/
reg.id = ARM64_CORE_REG(regs.pstate);
reg.addr = (u64)&psr;
@@ -430,16 +433,40 @@ int kvm_cpu__get_endianness(struct kvm_cpu *vcpu)
if (psr & PSR_MODE32_BIT)
return (psr & COMPAT_PSR_E_BIT) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
- reg.id = ARM64_SYS_REG(ARM_CPU_CTRL, ARM_CPU_CTRL_SCTLR_EL1);
+ if (vcpu->kvm->cfg.arch.nested_virt) {
+ reg.id = ARM64_SYS_REG(SYS_HCR_EL2);
+ reg.addr = (u64)&hcr;
+ if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, ®) < 0)
+ die("KVM_GET_ONE_REG failed (HCR_EL2)");
+ }
+
+ switch (psr & PSR_MODE_MASK) {
+ case PSR_MODE_EL0t:
+ if (hcr & HCR_EL2_TGE)
+ reg.id = ARM64_SYS_REG(SYS_SCTLR_EL2);
+ else
+ reg.id = ARM64_SYS_REG(SYS_SCTLR_EL1);
+ bit = SCTLR_EL1_E0E_MASK;
+ break;
+ case PSR_MODE_EL1t:
+ case PSR_MODE_EL1h:
+ reg.id = ARM64_SYS_REG(SYS_SCTLR_EL1);
+ bit = SCTLR_EL1_EE_MASK;
+ break;
+ case PSR_MODE_EL2t:
+ case PSR_MODE_EL2h:
+ reg.id = ARM64_SYS_REG(SYS_SCTLR_EL2);
+ bit = SCTLR_EL1_EE_MASK;
+ break;
+ default:
+ die("What's that mode???\n");
+ }
+
reg.addr = (u64)&sctlr;
if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, ®) < 0)
- die("KVM_GET_ONE_REG failed (SCTLR_EL1)");
+ die("KVM_GET_ONE_REG failed (SCTLR_ELx)");
- if ((psr & PSR_MODE_MASK) == PSR_MODE_EL0t)
- sctlr &= SCTLR_EL1_E0E_MASK;
- else
- sctlr &= SCTLR_EL1_EE_MASK;
- return sctlr ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
+ return (sctlr & bit) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
}
void kvm_cpu__show_code(struct kvm_cpu *vcpu)
--
2.47.3
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH kvmtool v6 6/6] arm64: Handle virtio endianness reset when running nested
2026-02-11 13:12 ` [PATCH kvmtool v6 6/6] arm64: Handle virtio endianness reset when running nested Andre Przywara
@ 2026-02-11 14:08 ` Marc Zyngier
2026-03-23 15:31 ` Andre Przywara
0 siblings, 1 reply; 9+ messages in thread
From: Marc Zyngier @ 2026-02-11 14:08 UTC (permalink / raw)
To: Andre Przywara
Cc: Will Deacon, Julien Thierry, Sascha Bischoff, kvm, kvmarm,
Alexandru Elisei
On Wed, 11 Feb 2026 13:12:49 +0000,
Andre Przywara <andre.przywara@arm.com> wrote:
>
> From: Marc Zyngier <maz@kernel.org>
>
> When running an EL2 guest, we need to make sure we don't sample
> SCTLR_EL1 to work out the virtio endianness, as this is likely
> to be a bit random.
>
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
> arm64/include/kvm/kvm-cpu-arch.h | 5 ++--
> arm64/kvm-cpu.c | 47 +++++++++++++++++++++++++-------
> 2 files changed, 40 insertions(+), 12 deletions(-)
>
> diff --git a/arm64/include/kvm/kvm-cpu-arch.h b/arm64/include/kvm/kvm-cpu-arch.h
> index 1af394a..85646ad 100644
> --- a/arm64/include/kvm/kvm-cpu-arch.h
> +++ b/arm64/include/kvm/kvm-cpu-arch.h
> @@ -10,8 +10,9 @@
> #define ARM_MPIDR_HWID_BITMASK 0xFF00FFFFFFUL
> #define ARM_CPU_ID 3, 0, 0, 0
> #define ARM_CPU_ID_MPIDR 5
> -#define ARM_CPU_CTRL 3, 0, 1, 0
> -#define ARM_CPU_CTRL_SCTLR_EL1 0
> +#define SYS_SCTLR_EL1 3, 4, 1, 0, 0
> +#define SYS_SCTLR_EL2 3, 4, 1, 0, 0
Sascha pointed out this howler of a bug last time: SCTLR_EL1 and EL2
have the same encoding here, which is obviously wrong.
This is definitely introducing a regression on EL1 guests.
> +#define SYS_HCR_EL2 3, 4, 1, 1, 0
>
> struct kvm_cpu {
> pthread_t thread;
> diff --git a/arm64/kvm-cpu.c b/arm64/kvm-cpu.c
> index 5e4f3a7..35e1c63 100644
> --- a/arm64/kvm-cpu.c
> +++ b/arm64/kvm-cpu.c
> @@ -12,6 +12,7 @@
>
> #define SCTLR_EL1_E0E_MASK (1 << 24)
> #define SCTLR_EL1_EE_MASK (1 << 25)
> +#define HCR_EL2_TGE (1 << 27)
>
> static int debug_fd;
>
> @@ -408,7 +409,8 @@ int kvm_cpu__get_endianness(struct kvm_cpu *vcpu)
> {
> struct kvm_one_reg reg;
> u64 psr;
> - u64 sctlr;
> + u64 sctlr, bit;
> + u64 hcr = 0;
>
> /*
> * Quoting the definition given by Peter Maydell:
> @@ -419,8 +421,9 @@ int kvm_cpu__get_endianness(struct kvm_cpu *vcpu)
> * We first check for an AArch32 guest: its endianness can
> * change when using SETEND, which affects the CPSR.E bit.
> *
> - * If we're AArch64, use SCTLR_EL1.E0E if access comes from
> - * EL0, and SCTLR_EL1.EE if access comes from EL1.
> + * If we're AArch64, determine which SCTLR register to use,
> + * depending on NV being used or not. Then use either the E0E
> + * bit for EL0, or the EE bit for EL1/EL2.
> */
> reg.id = ARM64_CORE_REG(regs.pstate);
> reg.addr = (u64)&psr;
> @@ -430,16 +433,40 @@ int kvm_cpu__get_endianness(struct kvm_cpu *vcpu)
> if (psr & PSR_MODE32_BIT)
> return (psr & COMPAT_PSR_E_BIT) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
>
> - reg.id = ARM64_SYS_REG(ARM_CPU_CTRL, ARM_CPU_CTRL_SCTLR_EL1);
> + if (vcpu->kvm->cfg.arch.nested_virt) {
> + reg.id = ARM64_SYS_REG(SYS_HCR_EL2);
> + reg.addr = (u64)&hcr;
> + if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, ®) < 0)
> + die("KVM_GET_ONE_REG failed (HCR_EL2)");
> + }
> +
> + switch (psr & PSR_MODE_MASK) {
> + case PSR_MODE_EL0t:
> + if (hcr & HCR_EL2_TGE)
> + reg.id = ARM64_SYS_REG(SYS_SCTLR_EL2);
> + else
> + reg.id = ARM64_SYS_REG(SYS_SCTLR_EL1);
> + bit = SCTLR_EL1_E0E_MASK;
> + break;
And this is also buggy, as I pointed out in my review of v5 -- I even
provided a fix for it [1].
M.
[1] https://lore.kernel.org/all/86jyx8b9l2.wl-maz@kernel.org/
--
Without deviation from the norm, progress is not possible.
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH kvmtool v6 6/6] arm64: Handle virtio endianness reset when running nested
2026-02-11 14:08 ` Marc Zyngier
@ 2026-03-23 15:31 ` Andre Przywara
0 siblings, 0 replies; 9+ messages in thread
From: Andre Przywara @ 2026-03-23 15:31 UTC (permalink / raw)
To: Marc Zyngier
Cc: Will Deacon, Julien Thierry, Sascha Bischoff, kvm, kvmarm,
Alexandru Elisei
Hi Marc,
On 2/11/26 15:08, Marc Zyngier wrote:
> On Wed, 11 Feb 2026 13:12:49 +0000,
> Andre Przywara <andre.przywara@arm.com> wrote:
>>
>> From: Marc Zyngier <maz@kernel.org>
>>
>> When running an EL2 guest, we need to make sure we don't sample
>> SCTLR_EL1 to work out the virtio endianness, as this is likely
>> to be a bit random.
>>
>> Signed-off-by: Marc Zyngier <maz@kernel.org>
>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>> ---
>> arm64/include/kvm/kvm-cpu-arch.h | 5 ++--
>> arm64/kvm-cpu.c | 47 +++++++++++++++++++++++++-------
>> 2 files changed, 40 insertions(+), 12 deletions(-)
>>
>> diff --git a/arm64/include/kvm/kvm-cpu-arch.h b/arm64/include/kvm/kvm-cpu-arch.h
>> index 1af394a..85646ad 100644
>> --- a/arm64/include/kvm/kvm-cpu-arch.h
>> +++ b/arm64/include/kvm/kvm-cpu-arch.h
>> @@ -10,8 +10,9 @@
>> #define ARM_MPIDR_HWID_BITMASK 0xFF00FFFFFFUL
>> #define ARM_CPU_ID 3, 0, 0, 0
>> #define ARM_CPU_ID_MPIDR 5
>> -#define ARM_CPU_CTRL 3, 0, 1, 0
>> -#define ARM_CPU_CTRL_SCTLR_EL1 0
>> +#define SYS_SCTLR_EL1 3, 4, 1, 0, 0
>> +#define SYS_SCTLR_EL2 3, 4, 1, 0, 0
>
> Sascha pointed out this howler of a bug last time: SCTLR_EL1 and EL2
> have the same encoding here, which is obviously wrong.
Argh, indeed, I now remember, this was probably in the category of
"that's a no brainer, what's next ..." so I missed actually fixing it.
Done now, apologies for that.
>
> This is definitely introducing a regression on EL1 guests.
>
>> +#define SYS_HCR_EL2 3, 4, 1, 1, 0
>>
>> struct kvm_cpu {
>> pthread_t thread;
>> diff --git a/arm64/kvm-cpu.c b/arm64/kvm-cpu.c
>> index 5e4f3a7..35e1c63 100644
>> --- a/arm64/kvm-cpu.c
>> +++ b/arm64/kvm-cpu.c
>> @@ -12,6 +12,7 @@
>>
>> #define SCTLR_EL1_E0E_MASK (1 << 24)
>> #define SCTLR_EL1_EE_MASK (1 << 25)
>> +#define HCR_EL2_TGE (1 << 27)
>>
>> static int debug_fd;
>>
>> @@ -408,7 +409,8 @@ int kvm_cpu__get_endianness(struct kvm_cpu *vcpu)
>> {
>> struct kvm_one_reg reg;
>> u64 psr;
>> - u64 sctlr;
>> + u64 sctlr, bit;
>> + u64 hcr = 0;
>>
>> /*
>> * Quoting the definition given by Peter Maydell:
>> @@ -419,8 +421,9 @@ int kvm_cpu__get_endianness(struct kvm_cpu *vcpu)
>> * We first check for an AArch32 guest: its endianness can
>> * change when using SETEND, which affects the CPSR.E bit.
>> *
>> - * If we're AArch64, use SCTLR_EL1.E0E if access comes from
>> - * EL0, and SCTLR_EL1.EE if access comes from EL1.
>> + * If we're AArch64, determine which SCTLR register to use,
>> + * depending on NV being used or not. Then use either the E0E
>> + * bit for EL0, or the EE bit for EL1/EL2.
>> */
>> reg.id = ARM64_CORE_REG(regs.pstate);
>> reg.addr = (u64)&psr;
>> @@ -430,16 +433,40 @@ int kvm_cpu__get_endianness(struct kvm_cpu *vcpu)
>> if (psr & PSR_MODE32_BIT)
>> return (psr & COMPAT_PSR_E_BIT) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
>>
>> - reg.id = ARM64_SYS_REG(ARM_CPU_CTRL, ARM_CPU_CTRL_SCTLR_EL1);
>> + if (vcpu->kvm->cfg.arch.nested_virt) {
>> + reg.id = ARM64_SYS_REG(SYS_HCR_EL2);
>> + reg.addr = (u64)&hcr;
>> + if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, ®) < 0)
>> + die("KVM_GET_ONE_REG failed (HCR_EL2)");
>> + }
>> +
>> + switch (psr & PSR_MODE_MASK) {
>> + case PSR_MODE_EL0t:
>> + if (hcr & HCR_EL2_TGE)
>> + reg.id = ARM64_SYS_REG(SYS_SCTLR_EL2);
>> + else
>> + reg.id = ARM64_SYS_REG(SYS_SCTLR_EL1);
>> + bit = SCTLR_EL1_E0E_MASK;
>> + break;
>
> And this is also buggy, as I pointed out in my review of v5 -- I even
> provided a fix for it [1].
Right, applied now.
v7 incoming.
Cheers,
Andre
>
> M.
>
> [1] https://lore.kernel.org/all/86jyx8b9l2.wl-maz@kernel.org/
>
^ permalink raw reply [flat|nested] 9+ messages in thread