* [kvm-unit-tests PATCH v1 0/3] arm64: Add Stage-2 MMU and Nested Guest Framework
@ 2026-03-16 22:43 Jing Zhang
2026-03-16 22:43 ` [kvm-unit-tests PATCH v1 1/3] lib: arm64: Add stage2 page table management library Jing Zhang
` (3 more replies)
0 siblings, 4 replies; 10+ messages in thread
From: Jing Zhang @ 2026-03-16 22:43 UTC (permalink / raw)
To: KVM, KVMARM
Cc: Marc Zyngier, Joey Gouly, Andrew Jones, Alexandru Elisei,
Oliver Upton, Jing Zhang
This patch series introduces a lightweight infrastructure for managing ARM64
Stage-2 translation tables and executing nested guests. These components are
essential for testing advanced virtualization features such as nested
virtualization (NV) and GICv4 direct interrupt injection.
The series provides a generic Stage-2 MMU library supporting multiple
translation granules (4K, 16K, 64K) and dynamic page table management.
Building on this, it adds a guest execution framework that handles guest
lifecycle management, context switching and guest exit routing. A new test
case for Stage-2 MMU demand paging to verify fault handling.
Please note that this is a very preliminary implementation intended as a
startup baseline for future work in virtualization testing. Users should be
aware that because this is an early-stage baseline, some portions of the code
may just happen to work in its current state. There might be critical
architectural elements or edge-case handling missing that will need to be
addressed as the framework matures.
---
Jing Zhang (3):
lib: arm64: Add stage2 page table management library
lib: arm64: Add bare-metal guest execution framework
arm64: Add Stage-2 MMU demand paging test
arm/Makefile.arm64 | 4 +
arm/stage2-mmu-test.c | 100 +++++++++
lib/arm64/asm/guest.h | 156 ++++++++++++++
lib/arm64/asm/stage2_mmu.h | 74 +++++++
lib/arm64/guest.c | 197 ++++++++++++++++++
lib/arm64/guest_arch.S | 263 ++++++++++++++++++++++++
lib/arm64/stage2_mmu.c | 402 +++++++++++++++++++++++++++++++++++++
7 files changed, 1196 insertions(+)
create mode 100644 arm/stage2-mmu-test.c
create mode 100644 lib/arm64/asm/guest.h
create mode 100644 lib/arm64/asm/stage2_mmu.h
create mode 100644 lib/arm64/guest.c
create mode 100644 lib/arm64/guest_arch.S
create mode 100644 lib/arm64/stage2_mmu.c
base-commit: 86e53277ac80dabb04f4fa5fa6a6cc7649392bdc
--
2.53.0.851.ga537e3e6e9-goog
^ permalink raw reply [flat|nested] 10+ messages in thread
* [kvm-unit-tests PATCH v1 1/3] lib: arm64: Add stage2 page table management library
2026-03-16 22:43 [kvm-unit-tests PATCH v1 0/3] arm64: Add Stage-2 MMU and Nested Guest Framework Jing Zhang
@ 2026-03-16 22:43 ` Jing Zhang
2026-03-24 15:12 ` Wei-Lin Chang
2026-03-16 22:43 ` [kvm-unit-tests PATCH v1 2/3] lib: arm64: Add bare-metal guest execution framework Jing Zhang
` (2 subsequent siblings)
3 siblings, 1 reply; 10+ messages in thread
From: Jing Zhang @ 2026-03-16 22:43 UTC (permalink / raw)
To: KVM, KVMARM
Cc: Marc Zyngier, Joey Gouly, Andrew Jones, Alexandru Elisei,
Oliver Upton, Jing Zhang
Tests running at EL2 (hypervisor level) often require the ability to
manage Stage 2 translation tables to control Guest Physical Address (IPA)
to Host Physical Address (PA) translation.
Add a generic Stage 2 MMU library that provides software management of
ARM64 Stage 2 translation tables.
The library features include:
- Support for 4K, 16K, and 64K translation granules.
- Dynamic page table allocation using the allocator.
- Support for 2M block mappings where applicable.
- APIs for mapping, unmapping, enabling, and disabling the Stage 2 MMU.
- Basic fault info reporting (ESR, FAR, HPFAR).
This infrastructure is necessary for upcoming virtualization and
hypervisor-mode tests.
Signed-off-by: Jing Zhang <jingzhangos@google.com>
---
arm/Makefile.arm64 | 1 +
lib/arm64/asm/stage2_mmu.h | 74 +++++++
lib/arm64/stage2_mmu.c | 402 +++++++++++++++++++++++++++++++++++++
3 files changed, 477 insertions(+)
create mode 100644 lib/arm64/asm/stage2_mmu.h
create mode 100644 lib/arm64/stage2_mmu.c
diff --git a/arm/Makefile.arm64 b/arm/Makefile.arm64
index a40c830d..5e50f5ba 100644
--- a/arm/Makefile.arm64
+++ b/arm/Makefile.arm64
@@ -40,6 +40,7 @@ cflatobjs += lib/arm64/stack.o
cflatobjs += lib/arm64/processor.o
cflatobjs += lib/arm64/spinlock.o
cflatobjs += lib/arm64/gic-v3-its.o lib/arm64/gic-v3-its-cmd.o
+cflatobjs += lib/arm64/stage2_mmu.o
ifeq ($(CONFIG_EFI),y)
cflatobjs += lib/acpi.o
diff --git a/lib/arm64/asm/stage2_mmu.h b/lib/arm64/asm/stage2_mmu.h
new file mode 100644
index 00000000..c9e931a8
--- /dev/null
+++ b/lib/arm64/asm/stage2_mmu.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2026, Google LLC.
+ * Author: Jing Zhang <jingzhangos@google.com>
+ *
+ * SPDX-License-Identifier: LGPL-2.0-or-later
+ */
+#ifndef _ASMARM64_STAGE2_MMU_H_
+#define _ASMARM64_STAGE2_MMU_H_
+
+#include <libcflat.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+#define pte_is_table(pte) (pte_val(pte) & PTE_TABLE_BIT)
+
+/* Stage-2 Memory Attributes (MemAttr[3:0]) */
+#define S2_MEMATTR_NORMAL (0xFUL << 2) /* Normal Memory, Outer/Inner Write-Back */
+#define S2_MEMATTR_DEVICE (0x0UL << 2) /* Device-nGnRnE */
+
+#define ESR_ELx_EC_SHIFT (26)
+#define ESR_ELx_EC_HVC64 UL(0x16)
+#define ESR_ELx_EC_DABT_LOW UL(0x24)
+
+/* Stage-2 Access Permissions (S2AP[1:0]) */
+#define S2AP_NONE (0UL << 6)
+#define S2AP_RO (1UL << 6) /* Read-only */
+#define S2AP_WO (2UL << 6) /* Write-only */
+#define S2AP_RW (3UL << 6) /* Read-Write */
+
+/* Flags for mapping */
+#define S2_MAP_RW (S2AP_RW | S2_MEMATTR_NORMAL | PTE_AF | PTE_SHARED)
+#define S2_MAP_DEVICE (S2AP_RW | S2_MEMATTR_DEVICE | PTE_AF)
+
+enum s2_granule {
+ S2_PAGE_4K,
+ S2_PAGE_16K,
+ S2_PAGE_64K,
+};
+
+/* Main Stage-2 MMU Structure */
+struct s2_mmu {
+ pgd_t *pgd;
+ int vmid;
+
+ /* Configuration */
+ enum s2_granule granule;
+ bool allow_block_mappings;
+
+ /* Internal helpers calculated from granule & VA_BITS */
+ unsigned int page_shift;
+ unsigned int level_shift;
+ int root_level; /* 0, 1, or 2 */
+ unsigned long page_size;
+ unsigned long block_size;
+};
+
+/* API */
+/* Initialize an s2_mmu struct with specific settings */
+struct s2_mmu *s2mmu_init(int vmid, enum s2_granule granule, bool allow_block_mappings);
+
+/* Management */
+void s2mmu_destroy(struct s2_mmu *mmu);
+void s2mmu_map(struct s2_mmu *mmu, unsigned long ipa, unsigned long pa,
+ unsigned long size, unsigned long flags);
+void s2mmu_unmap(struct s2_mmu *mmu, unsigned long ipa, unsigned long size);
+
+/* Activation */
+void s2mmu_enable(struct s2_mmu *mmu);
+void s2mmu_disable(struct s2_mmu *mmu);
+
+/* Debug */
+void s2mmu_print_fault_info(void);
+
+#endif /* _ASMARM64_STAGE2_MMU_H_ */
diff --git a/lib/arm64/stage2_mmu.c b/lib/arm64/stage2_mmu.c
new file mode 100644
index 00000000..bfe87eac
--- /dev/null
+++ b/lib/arm64/stage2_mmu.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright (C) 2026, Google LLC.
+ * Author: Jing Zhang <jingzhangos@google.com>
+ *
+ * SPDX-License-Identifier: LGPL-2.0-or-later
+ */
+#include <libcflat.h>
+#include <alloc.h>
+#include <asm/stage2_mmu.h>
+#include <asm/sysreg.h>
+#include <asm/io.h>
+#include <asm/barrier.h>
+#include <alloc_page.h>
+
+/* VTCR_EL2 Definitions */
+#define VTCR_SH0_INNER (3UL << 12)
+#define VTCR_ORGN0_WBWA (1UL << 10)
+#define VTCR_IRGN0_WBWA (1UL << 8)
+
+/* TG0 Encodings */
+#define VTCR_TG0_4K (0UL << 14)
+#define VTCR_TG0_64K (1UL << 14)
+#define VTCR_TG0_16K (2UL << 14)
+
+/* Physical Address Size (PS) - Derive from VA_BITS for simplicity or max */
+#if VA_BITS > 40
+#define VTCR_PS_VAL (5UL << 16) /* 48-bit PA */
+#else
+#define VTCR_PS_VAL (2UL << 16) /* 40-bit PA */
+#endif
+
+struct s2_mmu *s2mmu_init(int vmid, enum s2_granule granule, bool allow_block_mappings)
+{
+ struct s2_mmu *mmu = calloc(1, sizeof(struct s2_mmu));
+ int order = 0;
+
+ mmu->vmid = vmid;
+ mmu->granule = granule;
+ mmu->allow_block_mappings = allow_block_mappings;
+
+ /* Configure shifts based on granule */
+ switch (granule) {
+ case S2_PAGE_4K:
+ mmu->page_shift = 12;
+ mmu->level_shift = 9;
+ /*
+ * Determine Root Level for 4K:
+ * VA_BITS > 39 (e.g. 48) -> Start L0
+ * VA_BITS <= 39 (e.g. 32, 36) -> Start L1
+ */
+ mmu->root_level = (VA_BITS > 39) ? 0 : 1;
+ break;
+ case S2_PAGE_16K:
+ mmu->page_shift = 14;
+ mmu->level_shift = 11;
+ /*
+ * 16K: L1 covers 47 bits. L0 not valid for 16K
+ * Start L1 for 47 bits. Start L2 for 36 bits.
+ */
+ mmu->root_level = (VA_BITS > 36) ? 1 : 2;
+ break;
+ case S2_PAGE_64K:
+ mmu->page_shift = 16;
+ mmu->level_shift = 13;
+ /* 64K: L1 covers 52 bits. L2 covers 42 bits. */
+ mmu->root_level = (VA_BITS > 42) ? 1 : 2;
+ break;
+ }
+
+ mmu->page_size = 1UL << mmu->page_shift;
+ mmu->block_size = 1UL << (mmu->page_shift + mmu->level_shift);
+
+ /* Alloc PGD. Use order for allocation size */
+ if (mmu->page_size > PAGE_SIZE) {
+ order = __builtin_ctz(mmu->page_size / PAGE_SIZE);
+ }
+ mmu->pgd = (pgd_t *)alloc_pages(order);
+ if (mmu->pgd) {
+ memset(mmu->pgd, 0, mmu->page_size);
+ } else {
+ free(mmu);
+ return NULL;
+ }
+
+ return mmu;
+}
+
+static unsigned long s2mmu_get_addr_mask(struct s2_mmu *mmu)
+{
+ switch (mmu->granule) {
+ case S2_PAGE_16K:
+ return GENMASK_ULL(47, 14);
+ case S2_PAGE_64K:
+ return GENMASK_ULL(47, 16);
+ default:
+ return GENMASK_ULL(47, 12); /* 4K */
+ }
+}
+
+static void s2mmu_free_tables(struct s2_mmu *mmu, pte_t *table, int level)
+{
+ unsigned long entries = 1UL << mmu->level_shift;
+ unsigned long mask = s2mmu_get_addr_mask(mmu);
+ unsigned long i;
+
+ /*
+ * Recurse if not leaf level
+ * Level 3 is always leaf page. Levels 0-2 can be Table or Block.
+ */
+ if (level < 3) {
+ for (i = 0; i < entries; i++) {
+ pte_t entry = table[i];
+ if ((pte_valid(entry) && pte_is_table(entry))) {
+ pte_t *next = (pte_t *)phys_to_virt(pte_val(entry) & mask);
+ s2mmu_free_tables(mmu, next, level + 1);
+ }
+ }
+ }
+
+ free_pages(table);
+}
+
+void s2mmu_destroy(struct s2_mmu *mmu)
+{
+ if (mmu->pgd)
+ s2mmu_free_tables(mmu, (pte_t *)mmu->pgd, mmu->root_level);
+ free(mmu);
+}
+
+void s2mmu_enable(struct s2_mmu *mmu)
+{
+ unsigned long vtcr = VTCR_PS_VAL | VTCR_SH0_INNER |
+ VTCR_ORGN0_WBWA | VTCR_IRGN0_WBWA;
+ unsigned long t0sz = 64 - VA_BITS;
+ unsigned long vttbr;
+
+ switch (mmu->granule) {
+ case S2_PAGE_4K:
+ vtcr |= VTCR_TG0_4K;
+ /* SL0 Encodings for 4K: 0=L2, 1=L1, 2=L0 */
+ if (mmu->root_level == 0)
+ vtcr |= (2UL << 6); /* Start L0 */
+ else if (mmu->root_level == 1)
+ vtcr |= (1UL << 6); /* Start L1 */
+ else
+ vtcr |= (0UL << 6); /* Start L2 */
+ break;
+ case S2_PAGE_16K:
+ vtcr |= VTCR_TG0_16K;
+ /* SL0 Encodings for 16K: 0=L3(Res), 1=L2, 2=L1, 3=L0(Res) */
+ if (mmu->root_level == 1)
+ vtcr |= (2UL << 6); /* Start L1 */
+ else
+ vtcr |= (1UL << 6); /* Start L2 */
+ break;
+ case S2_PAGE_64K:
+ vtcr |= VTCR_TG0_64K;
+ /* SL0 Encodings for 64K: 0=L3(Res), 1=L2, 2=L1, 3=L0(Res) */
+ if (mmu->root_level == 1)
+ vtcr |= (2UL << 6); /* Start L1 */
+ else
+ vtcr |= (1UL << 6); /* Start L2 */
+ break;
+ }
+
+ vtcr |= t0sz;
+
+ write_sysreg(vtcr, vtcr_el2);
+ isb();
+
+ /* Setup VTTBR */
+ vttbr = virt_to_phys(mmu->pgd);
+ vttbr |= ((unsigned long)mmu->vmid << 48);
+ write_sysreg(vttbr, vttbr_el2);
+ isb();
+
+ asm volatile("tlbi vmalls12e1is");
+ dsb(ish);
+ isb();
+}
+
+void s2mmu_disable(struct s2_mmu *mmu)
+{
+ write_sysreg(0, vttbr_el2);
+ isb();
+}
+
+static pte_t *get_pte(struct s2_mmu *mmu, pte_t *table, unsigned long idx, bool alloc)
+{
+ unsigned long mask = s2mmu_get_addr_mask(mmu);
+ pte_t entry = table[idx];
+ pte_t *next_table;
+ int order = 0;
+
+ if (pte_valid(entry)) {
+ if (pte_is_table(entry))
+ return (pte_t *)phys_to_virt(pte_val(entry) & mask);
+ /* Block Entry */
+ return NULL;
+ }
+
+ if (!alloc)
+ return NULL;
+
+ /* Allocate table memory covering the Stage-2 Granule size */
+ if (mmu->page_size > PAGE_SIZE)
+ order = __builtin_ctz(mmu->page_size / PAGE_SIZE);
+
+ next_table = (pte_t *)alloc_pages(order);
+ if (next_table)
+ memset(next_table, 0, mmu->page_size);
+
+ pte_val(entry) = virt_to_phys(next_table) | PTE_TABLE_BIT | PTE_VALID;
+ WRITE_ONCE(table[idx], entry);
+
+ return next_table;
+}
+
+void s2mmu_map(struct s2_mmu *mmu, unsigned long ipa, unsigned long pa,
+ unsigned long size, unsigned long flags)
+{
+ unsigned long level_mask, level_shift, level_size, level;
+ unsigned long start_ipa, end_ipa, idx;
+ pte_t entry, *table, *next_table;
+ bool is_block_level;
+
+ start_ipa = ipa;
+ end_ipa = ipa + size;
+ level_mask = (1UL << mmu->level_shift) - 1;
+
+ while (start_ipa < end_ipa) {
+ table = (pte_t *)mmu->pgd;
+
+ /* Walk from Root to Leaf */
+ for (level = mmu->root_level; level < 3; level++) {
+ level_shift = mmu->page_shift + (3 - level) * mmu->level_shift;
+ idx = (start_ipa >> level_shift) & level_mask;
+ level_size = 1UL << level_shift;
+
+ /*
+ * Check for Block Mapping
+ * Valid Block Levels:
+ * 4K: L1 (1G), L2 (2MB)
+ * 16K: L2 (32MB)
+ * 64K: L2 (512MB)
+ */
+ is_block_level = (level == 2) ||
+ (mmu->granule == S2_PAGE_4K && level == 1);
+
+ if (mmu->allow_block_mappings && is_block_level) {
+ if ((start_ipa & (level_size - 1)) == 0 &&
+ (pa & (level_size - 1)) == 0 &&
+ (start_ipa + level_size) <= end_ipa) {
+ /* Map Block */
+ pte_val(entry) = (pa & ~(level_size - 1)) |
+ flags | PTE_VALID;
+ WRITE_ONCE(table[idx], entry);
+ start_ipa += level_size;
+ pa += level_size;
+ goto next_chunk; /* Continue outer loop */
+ }
+ }
+
+ /* Move to next level */
+ next_table = get_pte(mmu, table, idx, true);
+ if (!next_table) {
+ printf("Error allocating or existing block conflict.\n");
+ return;
+ }
+ table = next_table;
+ }
+
+ /* Leaf Level (Level 3 PTE) */
+ if (level == 3) {
+ idx = (start_ipa >> mmu->page_shift) & level_mask;
+ pte_val(entry) = (pa & ~(mmu->page_size - 1)) | flags | PTE_TYPE_PAGE;
+ WRITE_ONCE(table[idx], entry);
+ start_ipa += mmu->page_size;
+ pa += mmu->page_size;
+ }
+
+next_chunk:
+ continue;
+ }
+
+ asm volatile("tlbi vmalls12e1is");
+ dsb(ish);
+ isb();
+}
+
+/*
+ * Recursive helper to unmap a range within a specific table.
+ * Returns true if the table at this level is now completely empty
+ * and should be freed by the caller.
+ */
+static bool s2mmu_unmap_level(struct s2_mmu *mmu, pte_t *table,
+ unsigned long current_ipa, int level,
+ unsigned long start_ipa, unsigned long end_ipa,
+ unsigned long mask)
+{
+ unsigned long level_size, entry_ipa, entry_end;
+ bool child_empty, table_empty = true;
+ pte_t entry, *next_table;
+ unsigned int level_shift;
+ unsigned long i;
+
+ /* Calculate shift and size for this level */
+ if (level == 3) {
+ level_shift = mmu->page_shift;
+ } else {
+ level_shift = mmu->page_shift + (3 - level) * mmu->level_shift;
+ }
+ level_size = 1UL << level_shift;
+
+ /* Iterate over all entries in this table */
+ for (i = 0; i < (1UL << mmu->level_shift); i++) {
+ entry = table[i];
+ entry_ipa = current_ipa + (i * level_size);
+ entry_end = entry_ipa + level_size;
+
+ /* Skip entries completely outside our target range */
+ if (entry_end <= start_ipa || entry_ipa >= end_ipa) {
+ if (pte_valid(entry))
+ table_empty = false;
+ continue;
+ }
+
+ /*
+ * If the entry is fully covered by the unmap range,
+ * we can clear it (leaf) or recurse and free (table).
+ */
+ if (entry_ipa >= start_ipa && entry_end <= end_ipa) {
+ if (pte_valid(entry)) {
+ if (pte_is_table(entry) && level < 3) {
+ /* Recurse to free children first */
+ next_table = (pte_t *)phys_to_virt(pte_val(entry) & mask);
+ s2mmu_free_tables(mmu, next_table, level + 1);
+ }
+ /* Invalidate the entry */
+ WRITE_ONCE(table[i], __pte(0));
+ }
+ continue;
+ }
+
+ /*
+ * Partial overlap: This must be a table (split required).
+ * If it's a Block, we can't split easily in this context
+ * without complex logic, so we generally skip or fail.
+ * Assuming standard breakdown: recurse into the table.
+ */
+ if (pte_valid(entry) && pte_is_table(entry) && level < 3) {
+ next_table = (pte_t *)phys_to_virt(pte_val(entry) & mask);
+ child_empty = s2mmu_unmap_level(mmu, next_table, entry_ipa, level + 1,
+ start_ipa, end_ipa, mask);
+
+ if (child_empty) {
+ free_pages(next_table);
+ WRITE_ONCE(table[i], __pte(0));
+ } else {
+ table_empty = false;
+ }
+ } else if (pte_valid(entry)) {
+ /*
+ * Overlap on a leaf/block entry that extends
+ * beyond the unmap range. We cannot simply clear it.
+ */
+ table_empty = false;
+ }
+ }
+
+ return table_empty;
+}
+
+void s2mmu_unmap(struct s2_mmu *mmu, unsigned long ipa, unsigned long size)
+{
+ unsigned long end_ipa = ipa + size;
+ unsigned long mask = s2mmu_get_addr_mask(mmu);
+
+ if (!mmu->pgd)
+ return;
+
+ /*
+ * Start recursion from the root level.
+ * We rarely free the PGD itself unless destroying the MMU,
+ * so we ignore the return value here.
+ */
+ s2mmu_unmap_level(mmu, (pte_t *)mmu->pgd, 0, mmu->root_level,
+ ipa, end_ipa, mask);
+
+ /* Ensure TLB invalidation occurs after page table updates */
+ asm volatile("tlbi vmalls12e1is");
+ dsb(ish);
+ isb();
+}
+
+void s2mmu_print_fault_info(void)
+{
+ unsigned long esr = read_sysreg(esr_el2);
+ unsigned long far = read_sysreg(far_el2);
+ unsigned long hpfar = read_sysreg(hpfar_el2);
+ printf("Stage-2 Fault Info: ESR=0x%lx FAR=0x%lx HPFAR=0x%lx\n", esr, far, hpfar);
+}
--
2.53.0.851.ga537e3e6e9-goog
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [kvm-unit-tests PATCH v1 2/3] lib: arm64: Add bare-metal guest execution framework
2026-03-16 22:43 [kvm-unit-tests PATCH v1 0/3] arm64: Add Stage-2 MMU and Nested Guest Framework Jing Zhang
2026-03-16 22:43 ` [kvm-unit-tests PATCH v1 1/3] lib: arm64: Add stage2 page table management library Jing Zhang
@ 2026-03-16 22:43 ` Jing Zhang
2026-03-17 1:46 ` Yao Yuan
` (3 more replies)
2026-03-16 22:43 ` [kvm-unit-tests PATCH v1 3/3] arm64: Add Stage-2 MMU demand paging test Jing Zhang
2026-03-24 11:43 ` [kvm-unit-tests PATCH v1 0/3] arm64: Add Stage-2 MMU and Nested Guest Framework Joey Gouly
3 siblings, 4 replies; 10+ messages in thread
From: Jing Zhang @ 2026-03-16 22:43 UTC (permalink / raw)
To: KVM, KVMARM
Cc: Marc Zyngier, Joey Gouly, Andrew Jones, Alexandru Elisei,
Oliver Upton, Jing Zhang
To test advanced KVM features such as nested virtualization (NV) and
GICv4 direct interrupt injection, kvm-unit-tests needs the ability to
act as an L1 hypervisor running at EL2 and manage its own L2 guests.
Introduce a lightweight guest management library that provides the
infrastructure to create, configure, and execute nested guests.
This framework includes:
- Guest lifecycle management: `guest_create()` and `guest_destroy()`
APIs to allocate guest context and setup Stage-2 identity mappings
for code and stack using the s2mmu library.
- Context switching: The `guest_run()` assembly routine handles
saving the host (L1) callee-saved registers and loading the guest
(L2) GPRs and EL1 system registers.
- VM-Exit handling: Installs an EL2 trap handler (`guest_hyp_vectors`)
to intercept guest exits and route them to `guest_c_exception_handler`
to determine whether to return to the host test logic or resume.
- Guest-internal exceptions: Provides `guest_el1_vectors` to catch
Sync, IRQ, FIQ, and SError exceptions occurring entirely within the
guest (EL1) without trapping to the host.
Signed-off-by: Jing Zhang <jingzhangos@google.com>
---
arm/Makefile.arm64 | 2 +
lib/arm64/asm/guest.h | 156 ++++++++++++++++++++++++
lib/arm64/guest.c | 197 ++++++++++++++++++++++++++++++
lib/arm64/guest_arch.S | 263 +++++++++++++++++++++++++++++++++++++++++
4 files changed, 618 insertions(+)
create mode 100644 lib/arm64/asm/guest.h
create mode 100644 lib/arm64/guest.c
create mode 100644 lib/arm64/guest_arch.S
diff --git a/arm/Makefile.arm64 b/arm/Makefile.arm64
index 5e50f5ba..9026fd71 100644
--- a/arm/Makefile.arm64
+++ b/arm/Makefile.arm64
@@ -41,6 +41,8 @@ cflatobjs += lib/arm64/processor.o
cflatobjs += lib/arm64/spinlock.o
cflatobjs += lib/arm64/gic-v3-its.o lib/arm64/gic-v3-its-cmd.o
cflatobjs += lib/arm64/stage2_mmu.o
+cflatobjs += lib/arm64/guest.o
+cflatobjs += lib/arm64/guest_arch.o
ifeq ($(CONFIG_EFI),y)
cflatobjs += lib/acpi.o
diff --git a/lib/arm64/asm/guest.h b/lib/arm64/asm/guest.h
new file mode 100644
index 00000000..1d70873d
--- /dev/null
+++ b/lib/arm64/asm/guest.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2026, Google LLC.
+ * Author: Jing Zhang <jingzhangos@google.com>
+ *
+ * SPDX-License-Identifier: LGPL-2.0-or-later
+ */
+#ifndef _ASMARM64_GUEST_H_
+#define _ASMARM64_GUEST_H_
+
+/* Offsets for assembly (Must match struct guest) */
+#define GUEST_X_OFFSET 0
+#define GUEST_ELR_OFFSET 248
+#define GUEST_SPSR_OFFSET 256
+#define GUEST_HCR_OFFSET 264
+#define GUEST_VTTBR_OFFSET 272
+#define GUEST_SCTLR_OFFSET 280
+#define GUEST_VBAR_OFFSET 288
+#define GUEST_SP_EL1_OFFSET 296
+#define GUEST_ESR_OFFSET 304
+#define GUEST_FAR_OFFSET 312
+#define GUEST_HPFAR_OFFSET 320
+#define GUEST_EXIT_CODE_OFFSET 328
+#define GUEST_TPIDR_EL1_OFFSET 336
+#define GUEST_ICH_VMCR_EL2_OFFSET 344
+
+#ifndef __ASSEMBLY__
+
+#include <libcflat.h>
+#include <asm/stage2_mmu.h>
+
+/* HCR_EL2 Definitions */
+#define HCR_VM (1UL << 0) /* Virtualization Enable */
+#define HCR_FMO (1UL << 3) /* Physical FIQ Routing */
+#define HCR_IMO (1UL << 4) /* Physical IRQ Routing */
+#define HCR_AMO (1UL << 5) /* Physical SError Interrupt Routing */
+#define HCR_RW (1UL << 31) /* Execution State: AArch64 */
+#define HCR_DC (1UL << 12) /* Default Cacheable */
+#define HCR_E2H (1UL << 34) /* EL2 Host */
+
+#define HCR_GUEST_FLAGS (HCR_VM | HCR_FMO | HCR_IMO | HCR_AMO | HCR_RW | \
+ HCR_DC | HCR_E2H)
+
+/* ICH_VMCR_EL2 bit definition */
+#define ICH_VMCR_PMR_SHIFT 24
+#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT)
+#define ICH_VMCR_ENG0_SHIFT 0
+#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT)
+#define ICH_VMCR_ENG1_SHIFT 1
+#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT)
+
+/* Guest stack size */
+#define GUEST_STACK_SIZE SZ_64K
+
+/*
+ * Result from Handler:
+ * RESUME: Keep guest running (ERET immediately)
+ * EXIT: Return to Host C caller
+ */
+enum guest_handler_result {
+ GUEST_ACTION_RESUME,
+ GUEST_ACTION_EXIT
+};
+
+struct guest;
+typedef enum guest_handler_result (*guest_handler_t)(struct guest *guest);
+
+/* EL1 (Guest-internal) Exception Vector */
+enum guest_el1_vector {
+ GUEST_EL1_SYNC,
+ GUEST_EL1_IRQ,
+ GUEST_EL1_FIQ,
+ GUEST_EL1_SERROR,
+ GUEST_EL1_MAX
+};
+
+/*
+ * Guest EL1 Exception Frame (pushed to guest stack by asm stub)
+ * We use a simplified frame: x0-x30, elr, spsr. size = 33*8
+ */
+struct guest_el1_regs {
+ unsigned long regs[31];
+ unsigned long elr;
+ unsigned long spsr;
+};
+
+typedef void (*guest_el1_handler_t)(struct guest_el1_regs *regs, unsigned int esr);
+
+/* Exceptions from the Guest (Lower EL using AArch64) */
+enum guest_vector {
+ GUEST_VECTOR_SYNC,
+ GUEST_VECTOR_IRQ,
+ GUEST_VECTOR_FIQ,
+ GUEST_VECTOR_SERROR,
+ GUEST_VECTOR_MAX
+};
+
+/*
+ * Guest Context Structure
+ * This will be pointed to by TPIDR_EL1 while the guest is running.
+ */
+struct guest_context {
+ guest_el1_handler_t handlers[GUEST_EL1_MAX];
+};
+
+struct guest {
+ /* 0x000: General Purpose Registers */
+ unsigned long x[31]; /* x0..x30 */
+
+ /* 0x0F8: Execution State */
+ unsigned long elr_el2;
+ unsigned long spsr_el2;
+
+ /* 0x108: Control Registers */
+ unsigned long hcr_el2;
+ unsigned long vttbr_el2;
+ unsigned long sctlr_el1;
+ unsigned long vbar_el1;
+ unsigned long sp_el1;
+
+ /* 0x130: Exit Information */
+ unsigned long esr_el2;
+ unsigned long far_el2;
+ unsigned long hpfar_el2;
+ unsigned long exit_code; /* enum guest_vector */
+ unsigned long tpidr_el1;
+
+ /* 0x158: GIC Registers */
+ unsigned long ich_vmcr_el2;
+
+ /* 0x160: Exception Handlers */
+ guest_handler_t handlers[GUEST_VECTOR_MAX];
+ struct guest_context *guest_context;
+
+ struct s2_mmu *s2mmu;
+};
+
+/* API */
+struct guest *guest_create(int vmid, void (*guest_func)(void), enum s2_granule granule);
+void guest_destroy(struct guest *guest);
+
+/* Configuration */
+void guest_set_vector(struct guest *guest, void *vector_table);
+void guest_set_stack(struct guest *guest, void *stack_top);
+void guest_install_handler(struct guest *guest, enum guest_vector v, guest_handler_t handler);
+
+/* Install handler for exceptions INSIDE EL1 */
+void guest_install_el1_handler(struct guest *guest, enum guest_el1_vector v, guest_el1_handler_t handler);
+
+unsigned long guest_c_exception_handler(struct guest *guest, unsigned long vector_offset);
+void guest_el1_c_handler(struct guest_el1_regs *regs, unsigned int vector);
+
+/* Core Run Loop */
+void guest_run(struct guest *guest);
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASMARM64_GUEST_H_ */
diff --git a/lib/arm64/guest.c b/lib/arm64/guest.c
new file mode 100644
index 00000000..6c256c11
--- /dev/null
+++ b/lib/arm64/guest.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2026, Google LLC.
+ * Author: Jing Zhang <jingzhangos@google.com>
+ *
+ * SPDX-License-Identifier: LGPL-2.0-or-later
+ */
+#include <libcflat.h>
+#include <asm/guest.h>
+#include <asm/io.h>
+#include <asm/sysreg.h>
+#include <asm/barrier.h>
+#include <alloc_page.h>
+#include <alloc.h>
+
+/* Compile-time checks to ensure Assembly macros match C Struct */
+_Static_assert(offsetof(struct guest, x) == GUEST_X_OFFSET,
+ "GUEST_X_OFFSET mismatch");
+_Static_assert(offsetof(struct guest, elr_el2) == GUEST_ELR_OFFSET,
+ "GUEST_ELR_OFFSET mismatch");
+_Static_assert(offsetof(struct guest, spsr_el2) == GUEST_SPSR_OFFSET,
+ "GUEST_SPSR_OFFSET mismatch");
+_Static_assert(offsetof(struct guest, hcr_el2) == GUEST_HCR_OFFSET,
+ "GUEST_HCR_OFFSET mismatch");
+_Static_assert(offsetof(struct guest, vttbr_el2) == GUEST_VTTBR_OFFSET,
+ "GUEST_VTTBR_OFFSET mismatch");
+_Static_assert(offsetof(struct guest, sctlr_el1) == GUEST_SCTLR_OFFSET,
+ "GUEST_SCTLR_OFFSET mismatch");
+_Static_assert(offsetof(struct guest, vbar_el1) == GUEST_VBAR_OFFSET,
+ "GUEST_VBAR_OFFSET mismatch");
+_Static_assert(offsetof(struct guest, sp_el1) == GUEST_SP_EL1_OFFSET,
+ "GUEST_SP_EL1_OFFSET mismatch");
+_Static_assert(offsetof(struct guest, esr_el2) == GUEST_ESR_OFFSET,
+ "GUEST_ESR_OFFSET mismatch");
+_Static_assert(offsetof(struct guest, far_el2) == GUEST_FAR_OFFSET,
+ "GUEST_FAR_OFFSET mismatch");
+_Static_assert(offsetof(struct guest, hpfar_el2) == GUEST_HPFAR_OFFSET,
+ "GUEST_HPFAR_OFFSET mismatch");
+_Static_assert(offsetof(struct guest, exit_code) == GUEST_EXIT_CODE_OFFSET,
+ "GUEST_EXIT_CODE_OFFSET mismatch");
+_Static_assert(offsetof(struct guest, tpidr_el1) == GUEST_TPIDR_EL1_OFFSET,
+ "GUEST_TPIDR_EL1_OFFSET mismatch");
+_Static_assert(offsetof(struct guest, ich_vmcr_el2) == GUEST_ICH_VMCR_EL2_OFFSET,
+ "GUEST_ICH_VMCR_EL2_OFFSET mismatch");
+
+/*
+ * C-Entry for Exception Handling
+ * Returns 0 to Resume Guest, 1 to Exit to Host Caller
+ */
+unsigned long guest_c_exception_handler(struct guest *guest, unsigned long vector_offset)
+{
+ enum guest_vector vector = (enum guest_vector)guest->exit_code;
+
+ /* Save Trap Info */
+ guest->esr_el2 = read_sysreg(esr_el2);
+ guest->far_el2 = read_sysreg(far_el2);
+ guest->hpfar_el2 = read_sysreg(hpfar_el2);
+
+ /* Invoke Handler if registered */
+ if (guest->handlers[vector]) {
+ if (guest->handlers[vector](guest) == GUEST_ACTION_RESUME) {
+ return 0; /* ASM stub will restore and ERET */
+ }
+ }
+
+ /* Default: Exit to caller */
+ return 1;
+}
+
+/* --- EL1 (Guest-Internal) Vector Handling --- */
+
+void guest_install_el1_handler(struct guest *guest, enum guest_el1_vector v, guest_el1_handler_t handler)
+{
+ if (guest && guest->guest_context && v < GUEST_EL1_MAX)
+ guest->guest_context->handlers[v] = handler;
+}
+
+void guest_el1_c_handler(struct guest_el1_regs *regs, unsigned int vector)
+{
+ struct guest_context *ctx = (struct guest_context *)read_sysreg(tpidr_el1);
+ unsigned int esr = read_sysreg(esr_el1);
+
+ if (ctx && vector < GUEST_EL1_MAX && ctx->handlers[vector]) {
+ ctx->handlers[vector](regs, esr);
+ } else {
+ printf("Guest: Unhandled Exception Vector %d, ESR=0x%x\n", vector, esr);
+ asm volatile("hvc #0xFFFF");
+ }
+}
+
+extern void guest_el1_vectors(void);
+
+static struct guest *__guest_create(struct s2_mmu *s2_ctx, void *entry_point)
+{
+ struct guest *guest = calloc(1, sizeof(struct guest));
+ struct guest_context *guest_ctx;
+ unsigned long guest_ctx_pa;
+
+ /* Allocate the internal context table */
+ guest_ctx = (void *)alloc_page();
+ memset(guest_ctx, 0, PAGE_SIZE);
+ guest->guest_context = guest_ctx;
+
+ guest_ctx_pa = virt_to_phys(guest_ctx);
+ if (s2_ctx)
+ s2mmu_map(s2_ctx, guest_ctx_pa, guest_ctx_pa, PAGE_SIZE, S2_MAP_RW);
+
+ guest->tpidr_el1 = guest_ctx_pa;;
+
+ guest->elr_el2 = (unsigned long)entry_point;
+ guest->spsr_el2 = 0x3C5; /* M=EL1h, DAIF=Masked */
+ guest->hcr_el2 = HCR_GUEST_FLAGS;
+
+ if (s2_ctx) {
+ guest->vttbr_el2 = virt_to_phys(s2_ctx->pgd);
+ guest->vttbr_el2 |= ((unsigned long)s2_ctx->vmid << 48);
+ }
+
+ guest->sctlr_el1 = read_sysreg(sctlr_el1);
+ guest->sctlr_el1 |= SCTLR_EL1_C | SCTLR_EL1_I | SCTLR_EL1_M;
+
+ guest->ich_vmcr_el2 = read_sysreg(ich_vmcr_el2);
+ guest->ich_vmcr_el2 |= (0xFFUL << ICH_VMCR_PMR_SHIFT) | (1UL << ICH_VMCR_ENG1_SHIFT);
+
+ guest->vbar_el1 = (unsigned long)guest_el1_vectors;
+ guest->s2mmu = s2_ctx;
+
+ return guest;
+}
+
+struct guest *guest_create(int vmid, void (*guest_func)(void), enum s2_granule granule)
+{
+ unsigned long guest_pa, code_base, stack_pa;
+ unsigned long *stack_page;
+ struct guest *guest;
+ struct s2_mmu *ctx;
+
+ ctx = s2mmu_init(vmid, granule, true);
+ /*
+ * Map the Host's code segment Identity Mapped (IPA=PA).
+ * To be safe, we map a large chunk (e.g., 2MB) around the function
+ * to capture any helper functions the compiler might generate calls to.
+ */
+ guest_pa = virt_to_phys((void *)guest_func);
+ code_base = guest_pa & ~(SZ_2M - 1);
+ s2mmu_map(ctx, code_base, code_base, SZ_2M, S2_MAP_RW);
+
+ /*
+ * Map Stack
+ * Allocate 16 pages (64K) in Host, get its PA, and map it for Guest.
+ */
+ stack_page = alloc_pages(get_order(GUEST_STACK_SIZE >> PAGE_SHIFT));
+ stack_pa = virt_to_phys(stack_page);
+ /* Identity Map it (IPA = PA) */
+ s2mmu_map(ctx, stack_pa, stack_pa, GUEST_STACK_SIZE, S2_MAP_RW);
+
+ s2mmu_enable(ctx);
+
+ /* Create Guest */
+ /* Entry point is the PA of the function (Identity Mapped) */
+ guest = __guest_create(ctx, (void *)guest_pa);
+
+ /*
+ * Setup Guest Stack Pointer
+ * Must match where we mapped the stack + Offset
+ */
+ guest_set_stack(guest, (void *)(stack_pa + GUEST_STACK_SIZE));
+
+ /* Map UART identity mapped, printf() available to guest */
+ s2mmu_map(ctx, 0x09000000, 0x09000000, PAGE_SIZE, S2_MAP_DEVICE);
+
+ return guest;
+}
+
+void guest_destroy(struct guest *guest)
+{
+ s2mmu_disable(guest->s2mmu);
+ s2mmu_destroy(guest->s2mmu);
+ if (guest->guest_context)
+ free_page(guest->guest_context);
+ free(guest);
+}
+
+void guest_set_vector(struct guest *guest, void *vector_table)
+{
+ guest->vbar_el1 = (unsigned long)vector_table;
+}
+
+void guest_set_stack(struct guest *guest, void *stack_top)
+{
+ guest->sp_el1 = (unsigned long)stack_top;
+}
+
+void guest_install_handler(struct guest *guest, enum guest_vector v, guest_handler_t handler)
+{
+ if (v < GUEST_VECTOR_MAX)
+ guest->handlers[v] = handler;
+}
diff --git a/lib/arm64/guest_arch.S b/lib/arm64/guest_arch.S
new file mode 100644
index 00000000..cb7074d7
--- /dev/null
+++ b/lib/arm64/guest_arch.S
@@ -0,0 +1,263 @@
+/*
+ * Copyright (C) 2026, Google LLC.
+ * Author: Jing Zhang <jingzhangos@google.com>
+ *
+ * SPDX-License-Identifier: LGPL-2.0-or-later
+ */
+#define __ASSEMBLY__
+#include <asm/guest.h>
+
+.global guest_run
+guest_run:
+ /* x0 = struct guest pointer */
+
+ /* Save Host Callee-Saved Regs */
+ stp x29, x30, [sp, #-16]!
+ stp x27, x28, [sp, #-16]!
+ stp x25, x26, [sp, #-16]!
+ stp x23, x24, [sp, #-16]!
+ stp x21, x22, [sp, #-16]!
+ stp x19, x20, [sp, #-16]!
+
+ /* Cache Guest Pointer in TPIDR_EL2 */
+ msr tpidr_el2, x0
+
+ /* Configure ICC_SRE_EL2 to allow EL1 access to SysRegs */
+ /* Bit 3 (Enable) = 1, Bit 0 (SRE) = 1 */
+ mrs x1, icc_sre_el2
+ orr x1, x1, #1
+ orr x1, x1, #(1 << 3)
+ msr icc_sre_el2, x1
+ isb
+
+ /* Enable virtual CPU interface */
+ mrs x1, ich_hcr_el2
+ orr x1, x1, #1
+ msr ich_hcr_el2, x1
+
+ /* Load Guest System Registers */
+ ldr x1, [x0, #GUEST_ELR_OFFSET]
+ msr elr_el2, x1
+ ldr x1, [x0, #GUEST_SPSR_OFFSET]
+ msr spsr_el2, x1
+ ldr x1, [x0, #GUEST_HCR_OFFSET]
+ msr hcr_el2, x1
+ ldr x1, [x0, #GUEST_VTTBR_OFFSET]
+ msr vttbr_el2, x1
+ ldr x1, [x0, #GUEST_SCTLR_OFFSET]
+ msr S3_5_c1_c0_0, x1
+ ldr x1, [x0, #GUEST_VBAR_OFFSET]
+ msr S3_5_c12_c0_0, x1
+ ldr x1, [x0, #GUEST_SP_EL1_OFFSET]
+ msr sp_el1, x1
+ ldr x1, [x0, #GUEST_TPIDR_EL1_OFFSET]
+ msr tpidr_el1, x1
+ ldr x1, [x0, #GUEST_ICH_VMCR_EL2_OFFSET]
+ msr ich_vmcr_el2, x1
+
+ /* Load Guest GPRs */
+ ldp x1, x2, [x0, #8]
+ ldp x3, x4, [x0, #24]
+ ldp x5, x6, [x0, #40]
+ ldp x7, x8, [x0, #56]
+ ldp x9, x10, [x0, #72]
+ ldp x11, x12, [x0, #88]
+ ldp x13, x14, [x0, #104]
+ ldp x15, x16, [x0, #120]
+ ldp x17, x18, [x0, #136]
+ ldp x19, x20, [x0, #152]
+ ldp x21, x22, [x0, #168]
+ ldp x23, x24, [x0, #184]
+ ldp x25, x26, [x0, #200]
+ ldp x27, x28, [x0, #216]
+ ldp x29, x30, [x0, #232]
+ ldr x0, [x0, #0]
+
+ /* Install Trap Handler */
+ adrp x29, guest_hyp_vectors
+ add x29, x29, :lo12:guest_hyp_vectors
+ msr vbar_el2, x29
+
+ /* Restore x29 from struct (via tpidr_el2) */
+ mrs x29, tpidr_el2
+ ldr x29, [x29, #232]
+
+ isb
+ eret
+
+ .align 11
+guest_hyp_vectors:
+ .skip 0x400
+
+guest_exit_sync:
+ stp x0, x1, [sp, #-16]!
+ mrs x0, tpidr_el2
+ mov x1, #0
+ str x1, [x0, #GUEST_EXIT_CODE_OFFSET]
+ b guest_common_exit
+
+ .balign 0x80
+
+guest_exit_irq:
+ stp x0, x1, [sp, #-16]!
+ mrs x0, tpidr_el2
+ mov x1, #1
+ str x1, [x0, #GUEST_EXIT_CODE_OFFSET]
+ b guest_common_exit
+
+guest_common_exit:
+ stp x2, x3, [x0, #16]
+ stp x4, x5, [x0, #32]
+ stp x6, x7, [x0, #48]
+ stp x8, x9, [x0, #64]
+ stp x10, x11, [x0, #80]
+ stp x12, x13, [x0, #96]
+ stp x14, x15, [x0, #112]
+ stp x16, x17, [x0, #128]
+ stp x18, x19, [x0, #144]
+ stp x20, x21, [x0, #160]
+ stp x22, x23, [x0, #176]
+ stp x24, x25, [x0, #192]
+ stp x26, x27, [x0, #208]
+ stp x28, x29, [x0, #224]
+ str x30, [x0, #240]
+
+ ldp x2, x3, [sp], #16
+ stp x2, x3, [x0, #0]
+
+ mrs x1, elr_el2
+ str x1, [x0, #GUEST_ELR_OFFSET]
+ mrs x1, spsr_el2
+ str x1, [x0, #GUEST_SPSR_OFFSET]
+ mrs x1, esr_el2
+ str x1, [x0, #GUEST_ESR_OFFSET]
+ mrs x1, far_el2
+ str x1, [x0, #GUEST_FAR_OFFSET]
+ mrs x1, hpfar_el2
+ str x1, [x0, #GUEST_HPFAR_OFFSET]
+ mrs x1, sp_el1
+ str x1, [x0, #GUEST_SP_EL1_OFFSET]
+ mrs x1, ich_vmcr_el2
+ str x1, [x0, #GUEST_ICH_VMCR_EL2_OFFSET]
+
+ /* x29 contains vector offset from entry */
+ mov x1, x29
+ bl guest_c_exception_handler
+ cbz x0, guest_resume_guest
+
+ /* EXIT */
+ /* Restore Host Callee-Saved Regs */
+ ldp x19, x20, [sp], #16
+ ldp x21, x22, [sp], #16
+ ldp x23, x24, [sp], #16
+ ldp x25, x26, [sp], #16
+ ldp x27, x28, [sp], #16
+ ldp x29, x30, [sp], #16
+ ret
+
+ /* RESUME */
+guest_resume_guest:
+ mrs x0, tpidr_el2
+ ldr x1, [x0, #GUEST_ELR_OFFSET]
+ msr elr_el2, x1
+ ldr x1, [x0, #GUEST_SPSR_OFFSET]
+ msr spsr_el2, x1
+ ldr x1, [x0, #GUEST_SP_EL1_OFFSET]
+ msr sp_el1, x1
+
+ ldp x1, x2, [x0, #8]
+ ldp x3, x4, [x0, #24]
+ ldp x5, x6, [x0, #40]
+ ldp x7, x8, [x0, #56]
+ ldp x9, x10, [x0, #72]
+ ldp x11, x12, [x0, #88]
+ ldp x13, x14, [x0, #104]
+ ldp x15, x16, [x0, #120]
+ ldp x17, x18, [x0, #136]
+ ldp x19, x20, [x0, #152]
+ ldp x21, x22, [x0, #168]
+ ldp x23, x24, [x0, #184]
+ ldp x25, x26, [x0, #200]
+ ldp x27, x28, [x0, #216]
+ ldp x29, x30, [x0, #232]
+ ldr x0, [x0, #0]
+ eret
+
+/* EL1 Vector Table */
+.align 11
+.global guest_el1_vectors
+guest_el1_vectors:
+ /* Sync (0x000) */
+ .skip 0x200
+ /* Sync (0x200) */
+ stp x29, x30, [sp, #-16]!
+ mov x29, #0
+ b guest_el1_common
+ .skip 0x80 - 12
+ /* IRQ (0x280) */
+ stp x29, x30, [sp, #-16]!
+ mov x29, #1
+ b guest_el1_common
+ .skip 0x80 - 12
+ /* FIQ (0x300) */
+ stp x29, x30, [sp, #-16]!
+ mov x29, #2
+ b guest_el1_common
+ .skip 0x80 - 12
+ /* SError (0x380) */
+ stp x29, x30, [sp, #-16]!
+ mov x29, #3
+ b guest_el1_common
+ .skip 0x400
+
+guest_el1_common:
+ sub sp, sp, #264
+ stp x0, x1, [sp, #0]
+ stp x2, x3, [sp, #16]
+ stp x4, x5, [sp, #32]
+ stp x6, x7, [sp, #48]
+ stp x8, x9, [sp, #64]
+ stp x10, x11, [sp, #80]
+ stp x12, x13, [sp, #96]
+ stp x14, x15, [sp, #112]
+ stp x16, x17, [sp, #128]
+ stp x18, x19, [sp, #144]
+ stp x20, x21, [sp, #160]
+ stp x22, x23, [sp, #176]
+ stp x24, x25, [sp, #192]
+ stp x26, x27, [sp, #208]
+ stp x28, x30, [sp, #224]
+
+ mrs x0, elr_el1
+ str x0, [sp, #248]
+ mrs x0, spsr_el1
+ str x0, [sp, #256]
+
+ mov x0, sp
+ mov x1, x29
+ bl guest_el1_c_handler
+
+ ldr x0, [sp, #248]
+ msr elr_el1, x0
+ ldr x0, [sp, #256]
+ msr spsr_el1, x0
+
+ ldp x0, x1, [sp, #0]
+ ldp x2, x3, [sp, #16]
+ ldp x4, x5, [sp, #32]
+ ldp x6, x7, [sp, #48]
+ ldp x8, x9, [sp, #64]
+ ldp x10, x11, [sp, #80]
+ ldp x12, x13, [sp, #96]
+ ldp x14, x15, [sp, #112]
+ ldp x16, x17, [sp, #128]
+ ldp x18, x19, [sp, #144]
+ ldp x20, x21, [sp, #160]
+ ldp x22, x23, [sp, #176]
+ ldp x24, x25, [sp, #192]
+ ldp x26, x27, [sp, #208]
+ ldp x28, x30, [sp, #224]
+
+ add sp, sp, #264
+ ldp x29, x30, [sp], #16
+ eret
--
2.53.0.851.ga537e3e6e9-goog
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [kvm-unit-tests PATCH v1 3/3] arm64: Add Stage-2 MMU demand paging test
2026-03-16 22:43 [kvm-unit-tests PATCH v1 0/3] arm64: Add Stage-2 MMU and Nested Guest Framework Jing Zhang
2026-03-16 22:43 ` [kvm-unit-tests PATCH v1 1/3] lib: arm64: Add stage2 page table management library Jing Zhang
2026-03-16 22:43 ` [kvm-unit-tests PATCH v1 2/3] lib: arm64: Add bare-metal guest execution framework Jing Zhang
@ 2026-03-16 22:43 ` Jing Zhang
2026-03-24 11:43 ` [kvm-unit-tests PATCH v1 0/3] arm64: Add Stage-2 MMU and Nested Guest Framework Joey Gouly
3 siblings, 0 replies; 10+ messages in thread
From: Jing Zhang @ 2026-03-16 22:43 UTC (permalink / raw)
To: KVM, KVMARM
Cc: Marc Zyngier, Joey Gouly, Andrew Jones, Alexandru Elisei,
Oliver Upton, Jing Zhang
Introduce a new test case to validate Stage-2 MMU fault handling. The
test verifies that the hypervisor correctly identifies and handles
Stage-2 data aborts triggered by a guest accessing unmapped memory.
The test performs the following:
- Sets up a guest with Stage-1 disabled, using identity-mapped host
code and shared data in the Stage-2 page tables.
- Triggers a Stage-2 data abort by accessing a specific unmapped IPA.
- Catches the exception in the host, verifies the fault address,
and dynamically maps a new page to resolve the fault.
- Resumes the guest to confirm the memory access completes successfully
and the fault handler functioned as expected.
Signed-off-by: Jing Zhang <jingzhangos@google.com>
---
arm/Makefile.arm64 | 1 +
arm/stage2-mmu-test.c | 100 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 101 insertions(+)
create mode 100644 arm/stage2-mmu-test.c
diff --git a/arm/Makefile.arm64 b/arm/Makefile.arm64
index 9026fd71..e547f92d 100644
--- a/arm/Makefile.arm64
+++ b/arm/Makefile.arm64
@@ -67,6 +67,7 @@ tests += $(TEST_DIR)/cache.$(exe)
tests += $(TEST_DIR)/debug.$(exe)
tests += $(TEST_DIR)/fpu.$(exe)
tests += $(TEST_DIR)/mte.$(exe)
+tests += $(TEST_DIR)/stage2-mmu-test.$(exe)
include $(SRCDIR)/$(TEST_DIR)/Makefile.common
diff --git a/arm/stage2-mmu-test.c b/arm/stage2-mmu-test.c
new file mode 100644
index 00000000..391c28f0
--- /dev/null
+++ b/arm/stage2-mmu-test.c
@@ -0,0 +1,100 @@
+/*
+ * ARM64 Stage-2 MMU Demand Paging Test
+ *
+ * This test validates stage-2 data abort handling by purposefully
+ * accessing unmapped memory in the guest and verifying that the
+ * host correctly handles the fault by mapping the page.
+ *
+ * Copyright (C) 2026 Google LLC.
+ * Author: Jing Zhang <jingzhangos@google.com>
+ *
+ * SPDX-License-Identifier: LGPL-2.0-or-later
+ */
+#include <libcflat.h>
+#include <alloc_page.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/guest.h>
+#include <asm/stage2_mmu.h>
+
+#define TEST_PAGE_IPA 0x40000000UL
+#define FAULT_ADDR_IPA 0x50000000UL
+#define TEST_DATA 0xBEEFCAFEUL
+
+static volatile bool handled = false;
+
+static void guest_code(void)
+{
+ volatile unsigned long *test_va = (void *)TEST_PAGE_IPA;
+ volatile unsigned long *fault_va = (void *)FAULT_ADDR_IPA;
+
+ *fault_va = *test_va;
+
+ if (*fault_va == *test_va)
+ handled = true;
+
+ asm("hvc #0");
+}
+
+int main(int argc, char **argv)
+{
+ struct guest *guest;
+ unsigned long *test_page, *fixup_page;
+ unsigned long code_va_base, code_pa_base, data_base, far, ec;
+
+ report_prefix_push("stage2-mmu");
+
+ guest = guest_create(smp_processor_id(), guest_code, S2_PAGE_4K);
+
+ /* Map host code: IPA(VA) -> PA */
+ /* We use the host VA as the Guest IPA because guest stage 1 is disabled. */
+ code_va_base = (unsigned long)guest_code;
+ code_pa_base = virt_to_phys((void *)guest_code);
+
+ /* Align to 2MB to use block descriptors where possible */
+ code_va_base = code_va_base & ~(SZ_2M - 1);
+ code_pa_base = code_pa_base & ~(SZ_2M - 1);
+ s2mmu_map(guest->s2mmu, code_va_base, code_pa_base, SZ_2M, S2_MAP_RW);
+
+ /* Identity map the shared variable */
+ data_base = virt_to_phys((void *)&handled) & PAGE_MASK;
+ s2mmu_map(guest->s2mmu, data_base, data_base, PAGE_SIZE, S2_MAP_RW);
+
+ /* Map test data page */
+ test_page = alloc_page();
+ *test_page = TEST_DATA;
+ s2mmu_map(guest->s2mmu, TEST_PAGE_IPA, virt_to_phys(test_page), PAGE_SIZE, S2_MAP_RW);
+
+ report_info("CPU%d: entering guest...", smp_processor_id());
+
+ while (1) {
+ guest_run(guest);
+
+ if (guest->exit_code == GUEST_VECTOR_SYNC) {
+ ec = guest->esr_el2 >> ESR_ELx_EC_SHIFT;
+ if (ec == ESR_ELx_EC_HVC64) {
+ report_info("CPU%d: Guest exited via HVC.", smp_processor_id());
+ break;
+ } else if (ec == ESR_ELx_EC_DABT_LOW) {
+ far = guest->far_el2;
+ if (far == FAULT_ADDR_IPA) {
+ fixup_page = alloc_page();
+ s2mmu_map(guest->s2mmu, FAULT_ADDR_IPA,
+ virt_to_phys(fixup_page), PAGE_SIZE, S2_MAP_RW);
+ report(true, "Caught stage-2 fault at 0x%lx", far);
+ } else {
+ report(false, "Unexpected fault address: 0x%lx", far);
+ break;
+ }
+ } else {
+ report(false, "Unexpected exception class: 0x%lx", ec);
+ break;
+ }
+ }
+ }
+
+ report(handled, "Stage-2 fault handling test completed");
+ guest_destroy(guest);
+
+ return report_summary();
+}
--
2.53.0.851.ga537e3e6e9-goog
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [kvm-unit-tests PATCH v1 2/3] lib: arm64: Add bare-metal guest execution framework
2026-03-16 22:43 ` [kvm-unit-tests PATCH v1 2/3] lib: arm64: Add bare-metal guest execution framework Jing Zhang
@ 2026-03-17 1:46 ` Yao Yuan
2026-03-17 8:09 ` Marc Zyngier
` (2 subsequent siblings)
3 siblings, 0 replies; 10+ messages in thread
From: Yao Yuan @ 2026-03-17 1:46 UTC (permalink / raw)
To: Jing Zhang
Cc: KVM, KVMARM, Marc Zyngier, Joey Gouly, Andrew Jones,
Alexandru Elisei, Oliver Upton
On Mon, Mar 16, 2026 at 03:43:48PM +0800, Jing Zhang wrote:
> To test advanced KVM features such as nested virtualization (NV) and
> GICv4 direct interrupt injection, kvm-unit-tests needs the ability to
> act as an L1 hypervisor running at EL2 and manage its own L2 guests.
>
> Introduce a lightweight guest management library that provides the
> infrastructure to create, configure, and execute nested guests.
>
> This framework includes:
> - Guest lifecycle management: `guest_create()` and `guest_destroy()`
> APIs to allocate guest context and setup Stage-2 identity mappings
> for code and stack using the s2mmu library.
> - Context switching: The `guest_run()` assembly routine handles
> saving the host (L1) callee-saved registers and loading the guest
> (L2) GPRs and EL1 system registers.
> - VM-Exit handling: Installs an EL2 trap handler (`guest_hyp_vectors`)
> to intercept guest exits and route them to `guest_c_exception_handler`
> to determine whether to return to the host test logic or resume.
> - Guest-internal exceptions: Provides `guest_el1_vectors` to catch
> Sync, IRQ, FIQ, and SError exceptions occurring entirely within the
> guest (EL1) without trapping to the host.
>
> Signed-off-by: Jing Zhang <jingzhangos@google.com>
> ---
> arm/Makefile.arm64 | 2 +
> lib/arm64/asm/guest.h | 156 ++++++++++++++++++++++++
> lib/arm64/guest.c | 197 ++++++++++++++++++++++++++++++
> lib/arm64/guest_arch.S | 263 +++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 618 insertions(+)
> create mode 100644 lib/arm64/asm/guest.h
> create mode 100644 lib/arm64/guest.c
> create mode 100644 lib/arm64/guest_arch.S
>
> diff --git a/arm/Makefile.arm64 b/arm/Makefile.arm64
> index 5e50f5ba..9026fd71 100644
> --- a/arm/Makefile.arm64
> +++ b/arm/Makefile.arm64
> @@ -41,6 +41,8 @@ cflatobjs += lib/arm64/processor.o
> cflatobjs += lib/arm64/spinlock.o
> cflatobjs += lib/arm64/gic-v3-its.o lib/arm64/gic-v3-its-cmd.o
> cflatobjs += lib/arm64/stage2_mmu.o
> +cflatobjs += lib/arm64/guest.o
> +cflatobjs += lib/arm64/guest_arch.o
>
> ifeq ($(CONFIG_EFI),y)
> cflatobjs += lib/acpi.o
> diff --git a/lib/arm64/asm/guest.h b/lib/arm64/asm/guest.h
> new file mode 100644
> index 00000000..1d70873d
> --- /dev/null
> +++ b/lib/arm64/asm/guest.h
> @@ -0,0 +1,156 @@
> +/*
> + * Copyright (C) 2026, Google LLC.
> + * Author: Jing Zhang <jingzhangos@google.com>
> + *
> + * SPDX-License-Identifier: LGPL-2.0-or-later
> + */
> +#ifndef _ASMARM64_GUEST_H_
> +#define _ASMARM64_GUEST_H_
> +
> +/* Offsets for assembly (Must match struct guest) */
> +#define GUEST_X_OFFSET 0
> +#define GUEST_ELR_OFFSET 248
> +#define GUEST_SPSR_OFFSET 256
> +#define GUEST_HCR_OFFSET 264
> +#define GUEST_VTTBR_OFFSET 272
> +#define GUEST_SCTLR_OFFSET 280
> +#define GUEST_VBAR_OFFSET 288
> +#define GUEST_SP_EL1_OFFSET 296
> +#define GUEST_ESR_OFFSET 304
> +#define GUEST_FAR_OFFSET 312
> +#define GUEST_HPFAR_OFFSET 320
> +#define GUEST_EXIT_CODE_OFFSET 328
> +#define GUEST_TPIDR_EL1_OFFSET 336
> +#define GUEST_ICH_VMCR_EL2_OFFSET 344
> +
> +#ifndef __ASSEMBLY__
> +
> +#include <libcflat.h>
> +#include <asm/stage2_mmu.h>
> +
> +/* HCR_EL2 Definitions */
> +#define HCR_VM (1UL << 0) /* Virtualization Enable */
> +#define HCR_FMO (1UL << 3) /* Physical FIQ Routing */
> +#define HCR_IMO (1UL << 4) /* Physical IRQ Routing */
> +#define HCR_AMO (1UL << 5) /* Physical SError Interrupt Routing */
> +#define HCR_RW (1UL << 31) /* Execution State: AArch64 */
> +#define HCR_DC (1UL << 12) /* Default Cacheable */
> +#define HCR_E2H (1UL << 34) /* EL2 Host */
> +
> +#define HCR_GUEST_FLAGS (HCR_VM | HCR_FMO | HCR_IMO | HCR_AMO | HCR_RW | \
> + HCR_DC | HCR_E2H)
Set HCR_DC works for the testing in patch 03, for more
possible testing in L2 I guess the L2 guest's own paging is
necessary, this can be done by mapping L1 VA to L1 PA in
L2's page table (like kselftesting), then do IPA->PA with
Identity mapping in L1's Stage 2 mapping.
> +
> +/* ICH_VMCR_EL2 bit definition */
> +#define ICH_VMCR_PMR_SHIFT 24
> +#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT)
> +#define ICH_VMCR_ENG0_SHIFT 0
> +#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT)
> +#define ICH_VMCR_ENG1_SHIFT 1
> +#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT)
> +
> +/* Guest stack size */
> +#define GUEST_STACK_SIZE SZ_64K
> +
> +/*
> + * Result from Handler:
> + * RESUME: Keep guest running (ERET immediately)
> + * EXIT: Return to Host C caller
> + */
> +enum guest_handler_result {
> + GUEST_ACTION_RESUME,
> + GUEST_ACTION_EXIT
> +};
> +
> +struct guest;
> +typedef enum guest_handler_result (*guest_handler_t)(struct guest *guest);
> +
> +/* EL1 (Guest-internal) Exception Vector */
> +enum guest_el1_vector {
> + GUEST_EL1_SYNC,
> + GUEST_EL1_IRQ,
> + GUEST_EL1_FIQ,
> + GUEST_EL1_SERROR,
> + GUEST_EL1_MAX
> +};
> +
> +/*
> + * Guest EL1 Exception Frame (pushed to guest stack by asm stub)
> + * We use a simplified frame: x0-x30, elr, spsr. size = 33*8
> + */
> +struct guest_el1_regs {
> + unsigned long regs[31];
> + unsigned long elr;
> + unsigned long spsr;
> +};
> +
> +typedef void (*guest_el1_handler_t)(struct guest_el1_regs *regs, unsigned int esr);
> +
> +/* Exceptions from the Guest (Lower EL using AArch64) */
> +enum guest_vector {
> + GUEST_VECTOR_SYNC,
> + GUEST_VECTOR_IRQ,
> + GUEST_VECTOR_FIQ,
> + GUEST_VECTOR_SERROR,
> + GUEST_VECTOR_MAX
> +};
> +
> +/*
> + * Guest Context Structure
> + * This will be pointed to by TPIDR_EL1 while the guest is running.
> + */
> +struct guest_context {
> + guest_el1_handler_t handlers[GUEST_EL1_MAX];
> +};
> +
> +struct guest {
> + /* 0x000: General Purpose Registers */
> + unsigned long x[31]; /* x0..x30 */
> +
> + /* 0x0F8: Execution State */
> + unsigned long elr_el2;
> + unsigned long spsr_el2;
> +
> + /* 0x108: Control Registers */
> + unsigned long hcr_el2;
> + unsigned long vttbr_el2;
> + unsigned long sctlr_el1;
> + unsigned long vbar_el1;
> + unsigned long sp_el1;
> +
> + /* 0x130: Exit Information */
> + unsigned long esr_el2;
> + unsigned long far_el2;
> + unsigned long hpfar_el2;
> + unsigned long exit_code; /* enum guest_vector */
> + unsigned long tpidr_el1;
> +
> + /* 0x158: GIC Registers */
> + unsigned long ich_vmcr_el2;
> +
> + /* 0x160: Exception Handlers */
> + guest_handler_t handlers[GUEST_VECTOR_MAX];
> + struct guest_context *guest_context;
> +
> + struct s2_mmu *s2mmu;
> +};
> +
> +/* API */
> +struct guest *guest_create(int vmid, void (*guest_func)(void), enum s2_granule granule);
> +void guest_destroy(struct guest *guest);
> +
> +/* Configuration */
> +void guest_set_vector(struct guest *guest, void *vector_table);
> +void guest_set_stack(struct guest *guest, void *stack_top);
> +void guest_install_handler(struct guest *guest, enum guest_vector v, guest_handler_t handler);
> +
> +/* Install handler for exceptions INSIDE EL1 */
> +void guest_install_el1_handler(struct guest *guest, enum guest_el1_vector v, guest_el1_handler_t handler);
> +
> +unsigned long guest_c_exception_handler(struct guest *guest, unsigned long vector_offset);
> +void guest_el1_c_handler(struct guest_el1_regs *regs, unsigned int vector);
> +
> +/* Core Run Loop */
> +void guest_run(struct guest *guest);
> +
> +#endif /* __ASSEMBLY__ */
> +#endif /* _ASMARM64_GUEST_H_ */
> diff --git a/lib/arm64/guest.c b/lib/arm64/guest.c
> new file mode 100644
> index 00000000..6c256c11
> --- /dev/null
> +++ b/lib/arm64/guest.c
> @@ -0,0 +1,197 @@
> +/*
> + * Copyright (C) 2026, Google LLC.
> + * Author: Jing Zhang <jingzhangos@google.com>
> + *
> + * SPDX-License-Identifier: LGPL-2.0-or-later
> + */
> +#include <libcflat.h>
> +#include <asm/guest.h>
> +#include <asm/io.h>
> +#include <asm/sysreg.h>
> +#include <asm/barrier.h>
> +#include <alloc_page.h>
> +#include <alloc.h>
> +
> +/* Compile-time checks to ensure Assembly macros match C Struct */
> +_Static_assert(offsetof(struct guest, x) == GUEST_X_OFFSET,
> + "GUEST_X_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, elr_el2) == GUEST_ELR_OFFSET,
> + "GUEST_ELR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, spsr_el2) == GUEST_SPSR_OFFSET,
> + "GUEST_SPSR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, hcr_el2) == GUEST_HCR_OFFSET,
> + "GUEST_HCR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, vttbr_el2) == GUEST_VTTBR_OFFSET,
> + "GUEST_VTTBR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, sctlr_el1) == GUEST_SCTLR_OFFSET,
> + "GUEST_SCTLR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, vbar_el1) == GUEST_VBAR_OFFSET,
> + "GUEST_VBAR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, sp_el1) == GUEST_SP_EL1_OFFSET,
> + "GUEST_SP_EL1_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, esr_el2) == GUEST_ESR_OFFSET,
> + "GUEST_ESR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, far_el2) == GUEST_FAR_OFFSET,
> + "GUEST_FAR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, hpfar_el2) == GUEST_HPFAR_OFFSET,
> + "GUEST_HPFAR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, exit_code) == GUEST_EXIT_CODE_OFFSET,
> + "GUEST_EXIT_CODE_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, tpidr_el1) == GUEST_TPIDR_EL1_OFFSET,
> + "GUEST_TPIDR_EL1_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, ich_vmcr_el2) == GUEST_ICH_VMCR_EL2_OFFSET,
> + "GUEST_ICH_VMCR_EL2_OFFSET mismatch");
> +
> +/*
> + * C-Entry for Exception Handling
> + * Returns 0 to Resume Guest, 1 to Exit to Host Caller
> + */
> +unsigned long guest_c_exception_handler(struct guest *guest, unsigned long vector_offset)
> +{
> + enum guest_vector vector = (enum guest_vector)guest->exit_code;
> +
> + /* Save Trap Info */
> + guest->esr_el2 = read_sysreg(esr_el2);
> + guest->far_el2 = read_sysreg(far_el2);
> + guest->hpfar_el2 = read_sysreg(hpfar_el2);
> +
> + /* Invoke Handler if registered */
> + if (guest->handlers[vector]) {
> + if (guest->handlers[vector](guest) == GUEST_ACTION_RESUME) {
> + return 0; /* ASM stub will restore and ERET */
> + }
> + }
> +
> + /* Default: Exit to caller */
> + return 1;
> +}
> +
> +/* --- EL1 (Guest-Internal) Vector Handling --- */
> +
> +void guest_install_el1_handler(struct guest *guest, enum guest_el1_vector v, guest_el1_handler_t handler)
> +{
> + if (guest && guest->guest_context && v < GUEST_EL1_MAX)
> + guest->guest_context->handlers[v] = handler;
> +}
> +
> +void guest_el1_c_handler(struct guest_el1_regs *regs, unsigned int vector)
> +{
> + struct guest_context *ctx = (struct guest_context *)read_sysreg(tpidr_el1);
> + unsigned int esr = read_sysreg(esr_el1);
> +
> + if (ctx && vector < GUEST_EL1_MAX && ctx->handlers[vector]) {
> + ctx->handlers[vector](regs, esr);
> + } else {
> + printf("Guest: Unhandled Exception Vector %d, ESR=0x%x\n", vector, esr);
> + asm volatile("hvc #0xFFFF");
> + }
> +}
> +
> +extern void guest_el1_vectors(void);
> +
> +static struct guest *__guest_create(struct s2_mmu *s2_ctx, void *entry_point)
> +{
> + struct guest *guest = calloc(1, sizeof(struct guest));
> + struct guest_context *guest_ctx;
> + unsigned long guest_ctx_pa;
> +
> + /* Allocate the internal context table */
> + guest_ctx = (void *)alloc_page();
> + memset(guest_ctx, 0, PAGE_SIZE);
> + guest->guest_context = guest_ctx;
> +
> + guest_ctx_pa = virt_to_phys(guest_ctx);
> + if (s2_ctx)
> + s2mmu_map(s2_ctx, guest_ctx_pa, guest_ctx_pa, PAGE_SIZE, S2_MAP_RW);
> +
> + guest->tpidr_el1 = guest_ctx_pa;;
> +
> + guest->elr_el2 = (unsigned long)entry_point;
> + guest->spsr_el2 = 0x3C5; /* M=EL1h, DAIF=Masked */
> + guest->hcr_el2 = HCR_GUEST_FLAGS;
> +
> + if (s2_ctx) {
> + guest->vttbr_el2 = virt_to_phys(s2_ctx->pgd);
> + guest->vttbr_el2 |= ((unsigned long)s2_ctx->vmid << 48);
> + }
> +
> + guest->sctlr_el1 = read_sysreg(sctlr_el1);
> + guest->sctlr_el1 |= SCTLR_EL1_C | SCTLR_EL1_I | SCTLR_EL1_M;
> +
> + guest->ich_vmcr_el2 = read_sysreg(ich_vmcr_el2);
> + guest->ich_vmcr_el2 |= (0xFFUL << ICH_VMCR_PMR_SHIFT) | (1UL << ICH_VMCR_ENG1_SHIFT);
> +
> + guest->vbar_el1 = (unsigned long)guest_el1_vectors;
> + guest->s2mmu = s2_ctx;
> +
> + return guest;
> +}
> +
> +struct guest *guest_create(int vmid, void (*guest_func)(void), enum s2_granule granule)
> +{
> + unsigned long guest_pa, code_base, stack_pa;
> + unsigned long *stack_page;
> + struct guest *guest;
> + struct s2_mmu *ctx;
> +
> + ctx = s2mmu_init(vmid, granule, true);
> + /*
> + * Map the Host's code segment Identity Mapped (IPA=PA).
> + * To be safe, we map a large chunk (e.g., 2MB) around the function
> + * to capture any helper functions the compiler might generate calls to.
> + */
> + guest_pa = virt_to_phys((void *)guest_func);
> + code_base = guest_pa & ~(SZ_2M - 1);
> + s2mmu_map(ctx, code_base, code_base, SZ_2M, S2_MAP_RW);
> +
> + /*
> + * Map Stack
> + * Allocate 16 pages (64K) in Host, get its PA, and map it for Guest.
> + */
> + stack_page = alloc_pages(get_order(GUEST_STACK_SIZE >> PAGE_SHIFT));
> + stack_pa = virt_to_phys(stack_page);
> + /* Identity Map it (IPA = PA) */
> + s2mmu_map(ctx, stack_pa, stack_pa, GUEST_STACK_SIZE, S2_MAP_RW);
> +
> + s2mmu_enable(ctx);
> +
> + /* Create Guest */
> + /* Entry point is the PA of the function (Identity Mapped) */
> + guest = __guest_create(ctx, (void *)guest_pa);
> +
> + /*
> + * Setup Guest Stack Pointer
> + * Must match where we mapped the stack + Offset
> + */
> + guest_set_stack(guest, (void *)(stack_pa + GUEST_STACK_SIZE));
> +
> + /* Map UART identity mapped, printf() available to guest */
> + s2mmu_map(ctx, 0x09000000, 0x09000000, PAGE_SIZE, S2_MAP_DEVICE);
> +
> + return guest;
> +}
> +
> +void guest_destroy(struct guest *guest)
> +{
> + s2mmu_disable(guest->s2mmu);
> + s2mmu_destroy(guest->s2mmu);
> + if (guest->guest_context)
> + free_page(guest->guest_context);
> + free(guest);
> +}
> +
> +void guest_set_vector(struct guest *guest, void *vector_table)
> +{
> + guest->vbar_el1 = (unsigned long)vector_table;
> +}
> +
> +void guest_set_stack(struct guest *guest, void *stack_top)
> +{
> + guest->sp_el1 = (unsigned long)stack_top;
> +}
> +
> +void guest_install_handler(struct guest *guest, enum guest_vector v, guest_handler_t handler)
> +{
> + if (v < GUEST_VECTOR_MAX)
> + guest->handlers[v] = handler;
> +}
> diff --git a/lib/arm64/guest_arch.S b/lib/arm64/guest_arch.S
> new file mode 100644
> index 00000000..cb7074d7
> --- /dev/null
> +++ b/lib/arm64/guest_arch.S
> @@ -0,0 +1,263 @@
> +/*
> + * Copyright (C) 2026, Google LLC.
> + * Author: Jing Zhang <jingzhangos@google.com>
> + *
> + * SPDX-License-Identifier: LGPL-2.0-or-later
> + */
> +#define __ASSEMBLY__
> +#include <asm/guest.h>
> +
> +.global guest_run
> +guest_run:
> + /* x0 = struct guest pointer */
> +
> + /* Save Host Callee-Saved Regs */
> + stp x29, x30, [sp, #-16]!
> + stp x27, x28, [sp, #-16]!
> + stp x25, x26, [sp, #-16]!
> + stp x23, x24, [sp, #-16]!
> + stp x21, x22, [sp, #-16]!
> + stp x19, x20, [sp, #-16]!
> +
> + /* Cache Guest Pointer in TPIDR_EL2 */
> + msr tpidr_el2, x0
> +
> + /* Configure ICC_SRE_EL2 to allow EL1 access to SysRegs */
> + /* Bit 3 (Enable) = 1, Bit 0 (SRE) = 1 */
> + mrs x1, icc_sre_el2
> + orr x1, x1, #1
> + orr x1, x1, #(1 << 3)
> + msr icc_sre_el2, x1
> + isb
> +
> + /* Enable virtual CPU interface */
> + mrs x1, ich_hcr_el2
> + orr x1, x1, #1
> + msr ich_hcr_el2, x1
> +
> + /* Load Guest System Registers */
> + ldr x1, [x0, #GUEST_ELR_OFFSET]
> + msr elr_el2, x1
> + ldr x1, [x0, #GUEST_SPSR_OFFSET]
> + msr spsr_el2, x1
> + ldr x1, [x0, #GUEST_HCR_OFFSET]
> + msr hcr_el2, x1
> + ldr x1, [x0, #GUEST_VTTBR_OFFSET]
> + msr vttbr_el2, x1
> + ldr x1, [x0, #GUEST_SCTLR_OFFSET]
> + msr S3_5_c1_c0_0, x1
> + ldr x1, [x0, #GUEST_VBAR_OFFSET]
> + msr S3_5_c12_c0_0, x1
> + ldr x1, [x0, #GUEST_SP_EL1_OFFSET]
> + msr sp_el1, x1
> + ldr x1, [x0, #GUEST_TPIDR_EL1_OFFSET]
> + msr tpidr_el1, x1
> + ldr x1, [x0, #GUEST_ICH_VMCR_EL2_OFFSET]
> + msr ich_vmcr_el2, x1
> +
> + /* Load Guest GPRs */
> + ldp x1, x2, [x0, #8]
> + ldp x3, x4, [x0, #24]
> + ldp x5, x6, [x0, #40]
> + ldp x7, x8, [x0, #56]
> + ldp x9, x10, [x0, #72]
> + ldp x11, x12, [x0, #88]
> + ldp x13, x14, [x0, #104]
> + ldp x15, x16, [x0, #120]
> + ldp x17, x18, [x0, #136]
> + ldp x19, x20, [x0, #152]
> + ldp x21, x22, [x0, #168]
> + ldp x23, x24, [x0, #184]
> + ldp x25, x26, [x0, #200]
> + ldp x27, x28, [x0, #216]
> + ldp x29, x30, [x0, #232]
> + ldr x0, [x0, #0]
> +
> + /* Install Trap Handler */
> + adrp x29, guest_hyp_vectors
> + add x29, x29, :lo12:guest_hyp_vectors
> + msr vbar_el2, x29
> +
> + /* Restore x29 from struct (via tpidr_el2) */
> + mrs x29, tpidr_el2
> + ldr x29, [x29, #232]
> +
> + isb
> + eret
> +
> + .align 11
> +guest_hyp_vectors:
> + .skip 0x400
> +
> +guest_exit_sync:
> + stp x0, x1, [sp, #-16]!
> + mrs x0, tpidr_el2
> + mov x1, #0
> + str x1, [x0, #GUEST_EXIT_CODE_OFFSET]
> + b guest_common_exit
> +
> + .balign 0x80
> +
> +guest_exit_irq:
> + stp x0, x1, [sp, #-16]!
> + mrs x0, tpidr_el2
> + mov x1, #1
> + str x1, [x0, #GUEST_EXIT_CODE_OFFSET]
> + b guest_common_exit
> +
> +guest_common_exit:
> + stp x2, x3, [x0, #16]
> + stp x4, x5, [x0, #32]
> + stp x6, x7, [x0, #48]
> + stp x8, x9, [x0, #64]
> + stp x10, x11, [x0, #80]
> + stp x12, x13, [x0, #96]
> + stp x14, x15, [x0, #112]
> + stp x16, x17, [x0, #128]
> + stp x18, x19, [x0, #144]
> + stp x20, x21, [x0, #160]
> + stp x22, x23, [x0, #176]
> + stp x24, x25, [x0, #192]
> + stp x26, x27, [x0, #208]
> + stp x28, x29, [x0, #224]
> + str x30, [x0, #240]
> +
> + ldp x2, x3, [sp], #16
> + stp x2, x3, [x0, #0]
> +
> + mrs x1, elr_el2
> + str x1, [x0, #GUEST_ELR_OFFSET]
> + mrs x1, spsr_el2
> + str x1, [x0, #GUEST_SPSR_OFFSET]
> + mrs x1, esr_el2
> + str x1, [x0, #GUEST_ESR_OFFSET]
> + mrs x1, far_el2
> + str x1, [x0, #GUEST_FAR_OFFSET]
> + mrs x1, hpfar_el2
> + str x1, [x0, #GUEST_HPFAR_OFFSET]
> + mrs x1, sp_el1
> + str x1, [x0, #GUEST_SP_EL1_OFFSET]
> + mrs x1, ich_vmcr_el2
> + str x1, [x0, #GUEST_ICH_VMCR_EL2_OFFSET]
> +
> + /* x29 contains vector offset from entry */
> + mov x1, x29
> + bl guest_c_exception_handler
> + cbz x0, guest_resume_guest
> +
> + /* EXIT */
> + /* Restore Host Callee-Saved Regs */
> + ldp x19, x20, [sp], #16
> + ldp x21, x22, [sp], #16
> + ldp x23, x24, [sp], #16
> + ldp x25, x26, [sp], #16
> + ldp x27, x28, [sp], #16
> + ldp x29, x30, [sp], #16
> + ret
> +
> + /* RESUME */
> +guest_resume_guest:
> + mrs x0, tpidr_el2
> + ldr x1, [x0, #GUEST_ELR_OFFSET]
> + msr elr_el2, x1
> + ldr x1, [x0, #GUEST_SPSR_OFFSET]
> + msr spsr_el2, x1
> + ldr x1, [x0, #GUEST_SP_EL1_OFFSET]
> + msr sp_el1, x1
> +
> + ldp x1, x2, [x0, #8]
> + ldp x3, x4, [x0, #24]
> + ldp x5, x6, [x0, #40]
> + ldp x7, x8, [x0, #56]
> + ldp x9, x10, [x0, #72]
> + ldp x11, x12, [x0, #88]
> + ldp x13, x14, [x0, #104]
> + ldp x15, x16, [x0, #120]
> + ldp x17, x18, [x0, #136]
> + ldp x19, x20, [x0, #152]
> + ldp x21, x22, [x0, #168]
> + ldp x23, x24, [x0, #184]
> + ldp x25, x26, [x0, #200]
> + ldp x27, x28, [x0, #216]
> + ldp x29, x30, [x0, #232]
> + ldr x0, [x0, #0]
> + eret
> +
> +/* EL1 Vector Table */
> +.align 11
> +.global guest_el1_vectors
> +guest_el1_vectors:
> + /* Sync (0x000) */
> + .skip 0x200
> + /* Sync (0x200) */
> + stp x29, x30, [sp, #-16]!
> + mov x29, #0
> + b guest_el1_common
> + .skip 0x80 - 12
> + /* IRQ (0x280) */
> + stp x29, x30, [sp, #-16]!
> + mov x29, #1
> + b guest_el1_common
> + .skip 0x80 - 12
> + /* FIQ (0x300) */
> + stp x29, x30, [sp, #-16]!
> + mov x29, #2
> + b guest_el1_common
> + .skip 0x80 - 12
> + /* SError (0x380) */
> + stp x29, x30, [sp, #-16]!
> + mov x29, #3
> + b guest_el1_common
> + .skip 0x400
> +
> +guest_el1_common:
> + sub sp, sp, #264
> + stp x0, x1, [sp, #0]
> + stp x2, x3, [sp, #16]
> + stp x4, x5, [sp, #32]
> + stp x6, x7, [sp, #48]
> + stp x8, x9, [sp, #64]
> + stp x10, x11, [sp, #80]
> + stp x12, x13, [sp, #96]
> + stp x14, x15, [sp, #112]
> + stp x16, x17, [sp, #128]
> + stp x18, x19, [sp, #144]
> + stp x20, x21, [sp, #160]
> + stp x22, x23, [sp, #176]
> + stp x24, x25, [sp, #192]
> + stp x26, x27, [sp, #208]
> + stp x28, x30, [sp, #224]
> +
> + mrs x0, elr_el1
> + str x0, [sp, #248]
> + mrs x0, spsr_el1
> + str x0, [sp, #256]
> +
> + mov x0, sp
> + mov x1, x29
> + bl guest_el1_c_handler
> +
> + ldr x0, [sp, #248]
> + msr elr_el1, x0
> + ldr x0, [sp, #256]
> + msr spsr_el1, x0
> +
> + ldp x0, x1, [sp, #0]
> + ldp x2, x3, [sp, #16]
> + ldp x4, x5, [sp, #32]
> + ldp x6, x7, [sp, #48]
> + ldp x8, x9, [sp, #64]
> + ldp x10, x11, [sp, #80]
> + ldp x12, x13, [sp, #96]
> + ldp x14, x15, [sp, #112]
> + ldp x16, x17, [sp, #128]
> + ldp x18, x19, [sp, #144]
> + ldp x20, x21, [sp, #160]
> + ldp x22, x23, [sp, #176]
> + ldp x24, x25, [sp, #192]
> + ldp x26, x27, [sp, #208]
> + ldp x28, x30, [sp, #224]
> +
> + add sp, sp, #264
> + ldp x29, x30, [sp], #16
> + eret
> --
> 2.53.0.851.ga537e3e6e9-goog
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [kvm-unit-tests PATCH v1 2/3] lib: arm64: Add bare-metal guest execution framework
2026-03-16 22:43 ` [kvm-unit-tests PATCH v1 2/3] lib: arm64: Add bare-metal guest execution framework Jing Zhang
2026-03-17 1:46 ` Yao Yuan
@ 2026-03-17 8:09 ` Marc Zyngier
2026-03-24 15:04 ` Joey Gouly
2026-03-24 15:44 ` Wei-Lin Chang
3 siblings, 0 replies; 10+ messages in thread
From: Marc Zyngier @ 2026-03-17 8:09 UTC (permalink / raw)
To: Jing Zhang
Cc: KVM, KVMARM, Joey Gouly, Andrew Jones, Alexandru Elisei,
Oliver Upton
On Mon, 16 Mar 2026 22:43:48 +0000,
Jing Zhang <jingzhangos@google.com> wrote:
>
> To test advanced KVM features such as nested virtualization (NV) and
> GICv4 direct interrupt injection, kvm-unit-tests needs the ability to
> act as an L1 hypervisor running at EL2 and manage its own L2 guests.
>
> Introduce a lightweight guest management library that provides the
> infrastructure to create, configure, and execute nested guests.
>
> This framework includes:
> - Guest lifecycle management: `guest_create()` and `guest_destroy()`
> APIs to allocate guest context and setup Stage-2 identity mappings
> for code and stack using the s2mmu library.
> - Context switching: The `guest_run()` assembly routine handles
> saving the host (L1) callee-saved registers and loading the guest
> (L2) GPRs and EL1 system registers.
> - VM-Exit handling: Installs an EL2 trap handler (`guest_hyp_vectors`)
> to intercept guest exits and route them to `guest_c_exception_handler`
> to determine whether to return to the host test logic or resume.
> - Guest-internal exceptions: Provides `guest_el1_vectors` to catch
> Sync, IRQ, FIQ, and SError exceptions occurring entirely within the
> guest (EL1) without trapping to the host.
>
> Signed-off-by: Jing Zhang <jingzhangos@google.com>
> ---
> arm/Makefile.arm64 | 2 +
> lib/arm64/asm/guest.h | 156 ++++++++++++++++++++++++
> lib/arm64/guest.c | 197 ++++++++++++++++++++++++++++++
> lib/arm64/guest_arch.S | 263 +++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 618 insertions(+)
> create mode 100644 lib/arm64/asm/guest.h
> create mode 100644 lib/arm64/guest.c
> create mode 100644 lib/arm64/guest_arch.S
>
> diff --git a/arm/Makefile.arm64 b/arm/Makefile.arm64
> index 5e50f5ba..9026fd71 100644
> --- a/arm/Makefile.arm64
> +++ b/arm/Makefile.arm64
> @@ -41,6 +41,8 @@ cflatobjs += lib/arm64/processor.o
> cflatobjs += lib/arm64/spinlock.o
> cflatobjs += lib/arm64/gic-v3-its.o lib/arm64/gic-v3-its-cmd.o
> cflatobjs += lib/arm64/stage2_mmu.o
> +cflatobjs += lib/arm64/guest.o
> +cflatobjs += lib/arm64/guest_arch.o
>
> ifeq ($(CONFIG_EFI),y)
> cflatobjs += lib/acpi.o
> diff --git a/lib/arm64/asm/guest.h b/lib/arm64/asm/guest.h
> new file mode 100644
> index 00000000..1d70873d
> --- /dev/null
> +++ b/lib/arm64/asm/guest.h
> @@ -0,0 +1,156 @@
> +/*
> + * Copyright (C) 2026, Google LLC.
> + * Author: Jing Zhang <jingzhangos@google.com>
> + *
> + * SPDX-License-Identifier: LGPL-2.0-or-later
> + */
> +#ifndef _ASMARM64_GUEST_H_
> +#define _ASMARM64_GUEST_H_
> +
> +/* Offsets for assembly (Must match struct guest) */
> +#define GUEST_X_OFFSET 0
> +#define GUEST_ELR_OFFSET 248
> +#define GUEST_SPSR_OFFSET 256
> +#define GUEST_HCR_OFFSET 264
> +#define GUEST_VTTBR_OFFSET 272
> +#define GUEST_SCTLR_OFFSET 280
> +#define GUEST_VBAR_OFFSET 288
> +#define GUEST_SP_EL1_OFFSET 296
> +#define GUEST_ESR_OFFSET 304
> +#define GUEST_FAR_OFFSET 312
> +#define GUEST_HPFAR_OFFSET 320
> +#define GUEST_EXIT_CODE_OFFSET 328
> +#define GUEST_TPIDR_EL1_OFFSET 336
> +#define GUEST_ICH_VMCR_EL2_OFFSET 344
Don't hardcode offsets. Generate them.
> +
> +#ifndef __ASSEMBLY__
> +
> +#include <libcflat.h>
> +#include <asm/stage2_mmu.h>
> +
> +/* HCR_EL2 Definitions */
> +#define HCR_VM (1UL << 0) /* Virtualization Enable */
> +#define HCR_FMO (1UL << 3) /* Physical FIQ Routing */
> +#define HCR_IMO (1UL << 4) /* Physical IRQ Routing */
> +#define HCR_AMO (1UL << 5) /* Physical SError Interrupt Routing */
> +#define HCR_RW (1UL << 31) /* Execution State: AArch64 */
> +#define HCR_DC (1UL << 12) /* Default Cacheable */
> +#define HCR_E2H (1UL << 34) /* EL2 Host */
Please consider importing the kernel's sysreg definition, or generate
them from an official source (the architecture JSON file, for
example).
> +
> +#define HCR_GUEST_FLAGS (HCR_VM | HCR_FMO | HCR_IMO | HCR_AMO | HCR_RW | \
> + HCR_DC | HCR_E2H)
Just to set expectations: HCR_EL2.DC is not supported by KVM, and
likely never will. I'm hopeful that this bit (and a few others) will
eventually be deprecated because it serves no purpose. If you need a
1:1 S1 mapping, create it using (surprise!) page tables.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [kvm-unit-tests PATCH v1 0/3] arm64: Add Stage-2 MMU and Nested Guest Framework
2026-03-16 22:43 [kvm-unit-tests PATCH v1 0/3] arm64: Add Stage-2 MMU and Nested Guest Framework Jing Zhang
` (2 preceding siblings ...)
2026-03-16 22:43 ` [kvm-unit-tests PATCH v1 3/3] arm64: Add Stage-2 MMU demand paging test Jing Zhang
@ 2026-03-24 11:43 ` Joey Gouly
3 siblings, 0 replies; 10+ messages in thread
From: Joey Gouly @ 2026-03-24 11:43 UTC (permalink / raw)
To: Jing Zhang
Cc: KVM, KVMARM, Marc Zyngier, Andrew Jones, Alexandru Elisei,
Oliver Upton
On Mon, Mar 16, 2026 at 03:43:46PM -0700, Jing Zhang wrote:
> This patch series introduces a lightweight infrastructure for managing ARM64
> Stage-2 translation tables and executing nested guests. These components are
> essential for testing advanced virtualization features such as nested
> virtualization (NV) and GICv4 direct interrupt injection.
>
> The series provides a generic Stage-2 MMU library supporting multiple
> translation granules (4K, 16K, 64K) and dynamic page table management.
> Building on this, it adds a guest execution framework that handles guest
> lifecycle management, context switching and guest exit routing. A new test
> case for Stage-2 MMU demand paging to verify fault handling.
>
> Please note that this is a very preliminary implementation intended as a
> startup baseline for future work in virtualization testing. Users should be
> aware that because this is an early-stage baseline, some portions of the code
> may just happen to work in its current state. There might be critical
> architectural elements or edge-case handling missing that will need to be
> addressed as the framework matures.
>
Hi,
I'm interested in this, I had a much more barebones (aka no stage2) guest
framework thing that I sent out [1], but it seems more useful to try get this
series merged.
I will try get my tests working with this new framework, one thing missing for
me is support for executing at EL0.
Thanks,
Joey
[1] https://lore.kernel.org/kvmarm/20260306142656.2775185-1-joey.gouly@arm.com/#t
> ---
>
> Jing Zhang (3):
> lib: arm64: Add stage2 page table management library
> lib: arm64: Add bare-metal guest execution framework
> arm64: Add Stage-2 MMU demand paging test
>
> arm/Makefile.arm64 | 4 +
> arm/stage2-mmu-test.c | 100 +++++++++
> lib/arm64/asm/guest.h | 156 ++++++++++++++
> lib/arm64/asm/stage2_mmu.h | 74 +++++++
> lib/arm64/guest.c | 197 ++++++++++++++++++
> lib/arm64/guest_arch.S | 263 ++++++++++++++++++++++++
> lib/arm64/stage2_mmu.c | 402 +++++++++++++++++++++++++++++++++++++
> 7 files changed, 1196 insertions(+)
> create mode 100644 arm/stage2-mmu-test.c
> create mode 100644 lib/arm64/asm/guest.h
> create mode 100644 lib/arm64/asm/stage2_mmu.h
> create mode 100644 lib/arm64/guest.c
> create mode 100644 lib/arm64/guest_arch.S
> create mode 100644 lib/arm64/stage2_mmu.c
>
>
> base-commit: 86e53277ac80dabb04f4fa5fa6a6cc7649392bdc
> --
> 2.53.0.851.ga537e3e6e9-goog
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [kvm-unit-tests PATCH v1 2/3] lib: arm64: Add bare-metal guest execution framework
2026-03-16 22:43 ` [kvm-unit-tests PATCH v1 2/3] lib: arm64: Add bare-metal guest execution framework Jing Zhang
2026-03-17 1:46 ` Yao Yuan
2026-03-17 8:09 ` Marc Zyngier
@ 2026-03-24 15:04 ` Joey Gouly
2026-03-24 15:44 ` Wei-Lin Chang
3 siblings, 0 replies; 10+ messages in thread
From: Joey Gouly @ 2026-03-24 15:04 UTC (permalink / raw)
To: Jing Zhang
Cc: KVM, KVMARM, Marc Zyngier, Andrew Jones, Alexandru Elisei,
Oliver Upton
Hi,
Just some preliminary comments.
On Mon, Mar 16, 2026 at 03:43:48PM -0700, Jing Zhang wrote:
> To test advanced KVM features such as nested virtualization (NV) and
> GICv4 direct interrupt injection, kvm-unit-tests needs the ability to
> act as an L1 hypervisor running at EL2 and manage its own L2 guests.
>
> Introduce a lightweight guest management library that provides the
> infrastructure to create, configure, and execute nested guests.
>
> This framework includes:
> - Guest lifecycle management: `guest_create()` and `guest_destroy()`
> APIs to allocate guest context and setup Stage-2 identity mappings
> for code and stack using the s2mmu library.
> - Context switching: The `guest_run()` assembly routine handles
> saving the host (L1) callee-saved registers and loading the guest
> (L2) GPRs and EL1 system registers.
> - VM-Exit handling: Installs an EL2 trap handler (`guest_hyp_vectors`)
> to intercept guest exits and route them to `guest_c_exception_handler`
> to determine whether to return to the host test logic or resume.
> - Guest-internal exceptions: Provides `guest_el1_vectors` to catch
> Sync, IRQ, FIQ, and SError exceptions occurring entirely within the
> guest (EL1) without trapping to the host.
Might be helpful if this patch could be split into 2-3 commits.
>
> Signed-off-by: Jing Zhang <jingzhangos@google.com>
> ---
> arm/Makefile.arm64 | 2 +
> lib/arm64/asm/guest.h | 156 ++++++++++++++++++++++++
> lib/arm64/guest.c | 197 ++++++++++++++++++++++++++++++
> lib/arm64/guest_arch.S | 263 +++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 618 insertions(+)
> create mode 100644 lib/arm64/asm/guest.h
> create mode 100644 lib/arm64/guest.c
> create mode 100644 lib/arm64/guest_arch.S
>
> diff --git a/arm/Makefile.arm64 b/arm/Makefile.arm64
> index 5e50f5ba..9026fd71 100644
> --- a/arm/Makefile.arm64
> +++ b/arm/Makefile.arm64
> @@ -41,6 +41,8 @@ cflatobjs += lib/arm64/processor.o
> cflatobjs += lib/arm64/spinlock.o
> cflatobjs += lib/arm64/gic-v3-its.o lib/arm64/gic-v3-its-cmd.o
> cflatobjs += lib/arm64/stage2_mmu.o
> +cflatobjs += lib/arm64/guest.o
> +cflatobjs += lib/arm64/guest_arch.o
>
> ifeq ($(CONFIG_EFI),y)
> cflatobjs += lib/acpi.o
> diff --git a/lib/arm64/asm/guest.h b/lib/arm64/asm/guest.h
> new file mode 100644
> index 00000000..1d70873d
> --- /dev/null
> +++ b/lib/arm64/asm/guest.h
> @@ -0,0 +1,156 @@
> +/*
> + * Copyright (C) 2026, Google LLC.
> + * Author: Jing Zhang <jingzhangos@google.com>
> + *
> + * SPDX-License-Identifier: LGPL-2.0-or-later
> + */
> +#ifndef _ASMARM64_GUEST_H_
> +#define _ASMARM64_GUEST_H_
> +
> +/* Offsets for assembly (Must match struct guest) */
> +#define GUEST_X_OFFSET 0
> +#define GUEST_ELR_OFFSET 248
> +#define GUEST_SPSR_OFFSET 256
> +#define GUEST_HCR_OFFSET 264
> +#define GUEST_VTTBR_OFFSET 272
> +#define GUEST_SCTLR_OFFSET 280
> +#define GUEST_VBAR_OFFSET 288
> +#define GUEST_SP_EL1_OFFSET 296
> +#define GUEST_ESR_OFFSET 304
> +#define GUEST_FAR_OFFSET 312
> +#define GUEST_HPFAR_OFFSET 320
> +#define GUEST_EXIT_CODE_OFFSET 328
> +#define GUEST_TPIDR_EL1_OFFSET 336
> +#define GUEST_ICH_VMCR_EL2_OFFSET 344
Look at lib/arm64/asm-offsets.c for how to generate theses.
> +
> +#ifndef __ASSEMBLY__
> +
> +#include <libcflat.h>
> +#include <asm/stage2_mmu.h>
> +
> +/* HCR_EL2 Definitions */
> +#define HCR_VM (1UL << 0) /* Virtualization Enable */
> +#define HCR_FMO (1UL << 3) /* Physical FIQ Routing */
> +#define HCR_IMO (1UL << 4) /* Physical IRQ Routing */
> +#define HCR_AMO (1UL << 5) /* Physical SError Interrupt Routing */
> +#define HCR_RW (1UL << 31) /* Execution State: AArch64 */
> +#define HCR_DC (1UL << 12) /* Default Cacheable */
> +#define HCR_E2H (1UL << 34) /* EL2 Host */
> +
Should be in lib/arm64/asm/sysreg.h
> +#define HCR_GUEST_FLAGS (HCR_VM | HCR_FMO | HCR_IMO | HCR_AMO | HCR_RW | \
> + HCR_DC | HCR_E2H)
> +
> +/* ICH_VMCR_EL2 bit definition */
> +#define ICH_VMCR_PMR_SHIFT 24
> +#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT)
> +#define ICH_VMCR_ENG0_SHIFT 0
> +#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT)
> +#define ICH_VMCR_ENG1_SHIFT 1
> +#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT)
> +
> +/* Guest stack size */
> +#define GUEST_STACK_SIZE SZ_64K
> +
> +/*
> + * Result from Handler:
> + * RESUME: Keep guest running (ERET immediately)
> + * EXIT: Return to Host C caller
> + */
> +enum guest_handler_result {
> + GUEST_ACTION_RESUME,
> + GUEST_ACTION_EXIT
> +};
> +
> +struct guest;
> +typedef enum guest_handler_result (*guest_handler_t)(struct guest *guest);
> +
> +/* EL1 (Guest-internal) Exception Vector */
> +enum guest_el1_vector {
> + GUEST_EL1_SYNC,
> + GUEST_EL1_IRQ,
> + GUEST_EL1_FIQ,
> + GUEST_EL1_SERROR,
> + GUEST_EL1_MAX
> +};
> +
There's a similar vector enum in lib/arm64/asm/processor.h, is there a specific
need to have a separate guest_el1 version?
> +/*
> + * Guest EL1 Exception Frame (pushed to guest stack by asm stub)
> + * We use a simplified frame: x0-x30, elr, spsr. size = 33*8
> + */
> +struct guest_el1_regs {
> + unsigned long regs[31];
> + unsigned long elr;
> + unsigned long spsr;
> +};
What about SP?
> +
> +typedef void (*guest_el1_handler_t)(struct guest_el1_regs *regs, unsigned int esr);
> +
> +/* Exceptions from the Guest (Lower EL using AArch64) */
> +enum guest_vector {
> + GUEST_VECTOR_SYNC,
> + GUEST_VECTOR_IRQ,
> + GUEST_VECTOR_FIQ,
> + GUEST_VECTOR_SERROR,
> + GUEST_VECTOR_MAX
> +};
Same comment about vector.
> +
> +/*
> + * Guest Context Structure
> + * This will be pointed to by TPIDR_EL1 while the guest is running.
> + */
> +struct guest_context {
> + guest_el1_handler_t handlers[GUEST_EL1_MAX];
> +};
> +
> +struct guest {
> + /* 0x000: General Purpose Registers */
> + unsigned long x[31]; /* x0..x30 */
> +
> + /* 0x0F8: Execution State */
> + unsigned long elr_el2;
> + unsigned long spsr_el2;
> +
> + /* 0x108: Control Registers */
> + unsigned long hcr_el2;
> + unsigned long vttbr_el2;
> + unsigned long sctlr_el1;
> + unsigned long vbar_el1;
> + unsigned long sp_el1;
> +
> + /* 0x130: Exit Information */
> + unsigned long esr_el2;
> + unsigned long far_el2;
> + unsigned long hpfar_el2;
> + unsigned long exit_code; /* enum guest_vector */
> + unsigned long tpidr_el1;
> +
> + /* 0x158: GIC Registers */
> + unsigned long ich_vmcr_el2;
> +
> + /* 0x160: Exception Handlers */
> + guest_handler_t handlers[GUEST_VECTOR_MAX];
> + struct guest_context *guest_context;
> +
> + struct s2_mmu *s2mmu;
> +};
> +
> +/* API */
> +struct guest *guest_create(int vmid, void (*guest_func)(void), enum s2_granule granule);
> +void guest_destroy(struct guest *guest);
> +
> +/* Configuration */
> +void guest_set_vector(struct guest *guest, void *vector_table);
> +void guest_set_stack(struct guest *guest, void *stack_top);
> +void guest_install_handler(struct guest *guest, enum guest_vector v, guest_handler_t handler);
> +
> +/* Install handler for exceptions INSIDE EL1 */
> +void guest_install_el1_handler(struct guest *guest, enum guest_el1_vector v, guest_el1_handler_t handler);
> +
> +unsigned long guest_c_exception_handler(struct guest *guest, unsigned long vector_offset);
> +void guest_el1_c_handler(struct guest_el1_regs *regs, unsigned int vector);
> +
> +/* Core Run Loop */
> +void guest_run(struct guest *guest);
> +
> +#endif /* __ASSEMBLY__ */
> +#endif /* _ASMARM64_GUEST_H_ */
> diff --git a/lib/arm64/guest.c b/lib/arm64/guest.c
> new file mode 100644
> index 00000000..6c256c11
> --- /dev/null
> +++ b/lib/arm64/guest.c
> @@ -0,0 +1,197 @@
> +/*
> + * Copyright (C) 2026, Google LLC.
> + * Author: Jing Zhang <jingzhangos@google.com>
> + *
> + * SPDX-License-Identifier: LGPL-2.0-or-later
> + */
> +#include <libcflat.h>
> +#include <asm/guest.h>
> +#include <asm/io.h>
> +#include <asm/sysreg.h>
> +#include <asm/barrier.h>
> +#include <alloc_page.h>
> +#include <alloc.h>
> +
> +/* Compile-time checks to ensure Assembly macros match C Struct */
> +_Static_assert(offsetof(struct guest, x) == GUEST_X_OFFSET,
> + "GUEST_X_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, elr_el2) == GUEST_ELR_OFFSET,
> + "GUEST_ELR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, spsr_el2) == GUEST_SPSR_OFFSET,
> + "GUEST_SPSR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, hcr_el2) == GUEST_HCR_OFFSET,
> + "GUEST_HCR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, vttbr_el2) == GUEST_VTTBR_OFFSET,
> + "GUEST_VTTBR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, sctlr_el1) == GUEST_SCTLR_OFFSET,
> + "GUEST_SCTLR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, vbar_el1) == GUEST_VBAR_OFFSET,
> + "GUEST_VBAR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, sp_el1) == GUEST_SP_EL1_OFFSET,
> + "GUEST_SP_EL1_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, esr_el2) == GUEST_ESR_OFFSET,
> + "GUEST_ESR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, far_el2) == GUEST_FAR_OFFSET,
> + "GUEST_FAR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, hpfar_el2) == GUEST_HPFAR_OFFSET,
> + "GUEST_HPFAR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, exit_code) == GUEST_EXIT_CODE_OFFSET,
> + "GUEST_EXIT_CODE_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, tpidr_el1) == GUEST_TPIDR_EL1_OFFSET,
> + "GUEST_TPIDR_EL1_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, ich_vmcr_el2) == GUEST_ICH_VMCR_EL2_OFFSET,
> + "GUEST_ICH_VMCR_EL2_OFFSET mismatch");
> +
> +/*
> + * C-Entry for Exception Handling
> + * Returns 0 to Resume Guest, 1 to Exit to Host Caller
> + */
> +unsigned long guest_c_exception_handler(struct guest *guest, unsigned long vector_offset)
> +{
> + enum guest_vector vector = (enum guest_vector)guest->exit_code;
> +
> + /* Save Trap Info */
> + guest->esr_el2 = read_sysreg(esr_el2);
> + guest->far_el2 = read_sysreg(far_el2);
> + guest->hpfar_el2 = read_sysreg(hpfar_el2);
> +
> + /* Invoke Handler if registered */
> + if (guest->handlers[vector]) {
> + if (guest->handlers[vector](guest) == GUEST_ACTION_RESUME) {
> + return 0; /* ASM stub will restore and ERET */
> + }
> + }
> +
> + /* Default: Exit to caller */
> + return 1;
> +}
> +
> +/* --- EL1 (Guest-Internal) Vector Handling --- */
> +
> +void guest_install_el1_handler(struct guest *guest, enum guest_el1_vector v, guest_el1_handler_t handler)
> +{
> + if (guest && guest->guest_context && v < GUEST_EL1_MAX)
> + guest->guest_context->handlers[v] = handler;
> +}
> +
> +void guest_el1_c_handler(struct guest_el1_regs *regs, unsigned int vector)
> +{
> + struct guest_context *ctx = (struct guest_context *)read_sysreg(tpidr_el1);
> + unsigned int esr = read_sysreg(esr_el1);
> +
> + if (ctx && vector < GUEST_EL1_MAX && ctx->handlers[vector]) {
> + ctx->handlers[vector](regs, esr);
> + } else {
> + printf("Guest: Unhandled Exception Vector %d, ESR=0x%x\n", vector, esr);
> + asm volatile("hvc #0xFFFF");
> + }
> +}
> +
> +extern void guest_el1_vectors(void);
> +
> +static struct guest *__guest_create(struct s2_mmu *s2_ctx, void *entry_point)
> +{
> + struct guest *guest = calloc(1, sizeof(struct guest));
> + struct guest_context *guest_ctx;
> + unsigned long guest_ctx_pa;
> +
> + /* Allocate the internal context table */
> + guest_ctx = (void *)alloc_page();
> + memset(guest_ctx, 0, PAGE_SIZE);
> + guest->guest_context = guest_ctx;
> +
> + guest_ctx_pa = virt_to_phys(guest_ctx);
> + if (s2_ctx)
> + s2mmu_map(s2_ctx, guest_ctx_pa, guest_ctx_pa, PAGE_SIZE, S2_MAP_RW);
> +
> + guest->tpidr_el1 = guest_ctx_pa;;
> +
> + guest->elr_el2 = (unsigned long)entry_point;
> + guest->spsr_el2 = 0x3C5; /* M=EL1h, DAIF=Masked */
> + guest->hcr_el2 = HCR_GUEST_FLAGS;
> +
> + if (s2_ctx) {
> + guest->vttbr_el2 = virt_to_phys(s2_ctx->pgd);
> + guest->vttbr_el2 |= ((unsigned long)s2_ctx->vmid << 48);
> + }
> +
> + guest->sctlr_el1 = read_sysreg(sctlr_el1);
> + guest->sctlr_el1 |= SCTLR_EL1_C | SCTLR_EL1_I | SCTLR_EL1_M;
> +
> + guest->ich_vmcr_el2 = read_sysreg(ich_vmcr_el2);
> + guest->ich_vmcr_el2 |= (0xFFUL << ICH_VMCR_PMR_SHIFT) | (1UL << ICH_VMCR_ENG1_SHIFT);
> +
> + guest->vbar_el1 = (unsigned long)guest_el1_vectors;
> + guest->s2mmu = s2_ctx;
> +
> + return guest;
> +}
> +
> +struct guest *guest_create(int vmid, void (*guest_func)(void), enum s2_granule granule)
> +{
> + unsigned long guest_pa, code_base, stack_pa;
> + unsigned long *stack_page;
> + struct guest *guest;
> + struct s2_mmu *ctx;
> +
> + ctx = s2mmu_init(vmid, granule, true);
> + /*
> + * Map the Host's code segment Identity Mapped (IPA=PA).
> + * To be safe, we map a large chunk (e.g., 2MB) around the function
> + * to capture any helper functions the compiler might generate calls to.
> + */
> + guest_pa = virt_to_phys((void *)guest_func);
> + code_base = guest_pa & ~(SZ_2M - 1);
> + s2mmu_map(ctx, code_base, code_base, SZ_2M, S2_MAP_RW);
> +
> + /*
> + * Map Stack
> + * Allocate 16 pages (64K) in Host, get its PA, and map it for Guest.
> + */
> + stack_page = alloc_pages(get_order(GUEST_STACK_SIZE >> PAGE_SHIFT));
> + stack_pa = virt_to_phys(stack_page);
> + /* Identity Map it (IPA = PA) */
> + s2mmu_map(ctx, stack_pa, stack_pa, GUEST_STACK_SIZE, S2_MAP_RW);
> +
> + s2mmu_enable(ctx);
> +
> + /* Create Guest */
> + /* Entry point is the PA of the function (Identity Mapped) */
> + guest = __guest_create(ctx, (void *)guest_pa);
> +
> + /*
> + * Setup Guest Stack Pointer
> + * Must match where we mapped the stack + Offset
> + */
> + guest_set_stack(guest, (void *)(stack_pa + GUEST_STACK_SIZE));
> +
> + /* Map UART identity mapped, printf() available to guest */
> + s2mmu_map(ctx, 0x09000000, 0x09000000, PAGE_SIZE, S2_MAP_DEVICE);
> +
> + return guest;
> +}
> +
> +void guest_destroy(struct guest *guest)
> +{
> + s2mmu_disable(guest->s2mmu);
> + s2mmu_destroy(guest->s2mmu);
> + if (guest->guest_context)
> + free_page(guest->guest_context);
> + free(guest);
> +}
> +
> +void guest_set_vector(struct guest *guest, void *vector_table)
> +{
> + guest->vbar_el1 = (unsigned long)vector_table;
> +}
> +
> +void guest_set_stack(struct guest *guest, void *stack_top)
> +{
> + guest->sp_el1 = (unsigned long)stack_top;
> +}
> +
> +void guest_install_handler(struct guest *guest, enum guest_vector v, guest_handler_t handler)
> +{
> + if (v < GUEST_VECTOR_MAX)
> + guest->handlers[v] = handler;
> +}
> diff --git a/lib/arm64/guest_arch.S b/lib/arm64/guest_arch.S
> new file mode 100644
> index 00000000..cb7074d7
> --- /dev/null
> +++ b/lib/arm64/guest_arch.S
> @@ -0,0 +1,263 @@
> +/*
> + * Copyright (C) 2026, Google LLC.
> + * Author: Jing Zhang <jingzhangos@google.com>
> + *
> + * SPDX-License-Identifier: LGPL-2.0-or-later
> + */
> +#define __ASSEMBLY__
> +#include <asm/guest.h>
> +
> +.global guest_run
> +guest_run:
> + /* x0 = struct guest pointer */
> +
> + /* Save Host Callee-Saved Regs */
> + stp x29, x30, [sp, #-16]!
> + stp x27, x28, [sp, #-16]!
> + stp x25, x26, [sp, #-16]!
> + stp x23, x24, [sp, #-16]!
> + stp x21, x22, [sp, #-16]!
> + stp x19, x20, [sp, #-16]!
> +
> + /* Cache Guest Pointer in TPIDR_EL2 */
> + msr tpidr_el2, x0
> +
> + /* Configure ICC_SRE_EL2 to allow EL1 access to SysRegs */
> + /* Bit 3 (Enable) = 1, Bit 0 (SRE) = 1 */
> + mrs x1, icc_sre_el2
> + orr x1, x1, #1
> + orr x1, x1, #(1 << 3)
> + msr icc_sre_el2, x1
> + isb
> +
> + /* Enable virtual CPU interface */
> + mrs x1, ich_hcr_el2
> + orr x1, x1, #1
> + msr ich_hcr_el2, x1
> +
> + /* Load Guest System Registers */
> + ldr x1, [x0, #GUEST_ELR_OFFSET]
> + msr elr_el2, x1
> + ldr x1, [x0, #GUEST_SPSR_OFFSET]
> + msr spsr_el2, x1
> + ldr x1, [x0, #GUEST_HCR_OFFSET]
> + msr hcr_el2, x1
> + ldr x1, [x0, #GUEST_VTTBR_OFFSET]
> + msr vttbr_el2, x1
> + ldr x1, [x0, #GUEST_SCTLR_OFFSET]
> + msr S3_5_c1_c0_0, x1
> + ldr x1, [x0, #GUEST_VBAR_OFFSET]
> + msr S3_5_c12_c0_0, x1
Need to add these registers to the sysreg.h file, so don't need the raw encoding.
> + ldr x1, [x0, #GUEST_SP_EL1_OFFSET]
> + msr sp_el1, x1
> + ldr x1, [x0, #GUEST_TPIDR_EL1_OFFSET]
> + msr tpidr_el1, x1
> + ldr x1, [x0, #GUEST_ICH_VMCR_EL2_OFFSET]
> + msr ich_vmcr_el2, x1
> +
> + /* Load Guest GPRs */
> + ldp x1, x2, [x0, #8]
> + ldp x3, x4, [x0, #24]
> + ldp x5, x6, [x0, #40]
> + ldp x7, x8, [x0, #56]
> + ldp x9, x10, [x0, #72]
> + ldp x11, x12, [x0, #88]
> + ldp x13, x14, [x0, #104]
> + ldp x15, x16, [x0, #120]
> + ldp x17, x18, [x0, #136]
> + ldp x19, x20, [x0, #152]
> + ldp x21, x22, [x0, #168]
> + ldp x23, x24, [x0, #184]
> + ldp x25, x26, [x0, #200]
> + ldp x27, x28, [x0, #216]
> + ldp x29, x30, [x0, #232]
> + ldr x0, [x0, #0]
> +
> + /* Install Trap Handler */
> + adrp x29, guest_hyp_vectors
> + add x29, x29, :lo12:guest_hyp_vectors
> + msr vbar_el2, x29
> +
> + /* Restore x29 from struct (via tpidr_el2) */
> + mrs x29, tpidr_el2
> + ldr x29, [x29, #232]
> +
> + isb
> + eret
> +
> + .align 11
> +guest_hyp_vectors:
> + .skip 0x400
> +
> +guest_exit_sync:
> + stp x0, x1, [sp, #-16]!
> + mrs x0, tpidr_el2
> + mov x1, #0
> + str x1, [x0, #GUEST_EXIT_CODE_OFFSET]
> + b guest_common_exit
> +
> + .balign 0x80
> +
> +guest_exit_irq:
> + stp x0, x1, [sp, #-16]!
> + mrs x0, tpidr_el2
> + mov x1, #1
> + str x1, [x0, #GUEST_EXIT_CODE_OFFSET]
> + b guest_common_exit
> +
> +guest_common_exit:
> + stp x2, x3, [x0, #16]
> + stp x4, x5, [x0, #32]
> + stp x6, x7, [x0, #48]
> + stp x8, x9, [x0, #64]
> + stp x10, x11, [x0, #80]
> + stp x12, x13, [x0, #96]
> + stp x14, x15, [x0, #112]
> + stp x16, x17, [x0, #128]
> + stp x18, x19, [x0, #144]
> + stp x20, x21, [x0, #160]
> + stp x22, x23, [x0, #176]
> + stp x24, x25, [x0, #192]
> + stp x26, x27, [x0, #208]
> + stp x28, x29, [x0, #224]
> + str x30, [x0, #240]
> +
> + ldp x2, x3, [sp], #16
> + stp x2, x3, [x0, #0]
> +
> + mrs x1, elr_el2
> + str x1, [x0, #GUEST_ELR_OFFSET]
> + mrs x1, spsr_el2
> + str x1, [x0, #GUEST_SPSR_OFFSET]
> + mrs x1, esr_el2
> + str x1, [x0, #GUEST_ESR_OFFSET]
> + mrs x1, far_el2
> + str x1, [x0, #GUEST_FAR_OFFSET]
> + mrs x1, hpfar_el2
> + str x1, [x0, #GUEST_HPFAR_OFFSET]
> + mrs x1, sp_el1
> + str x1, [x0, #GUEST_SP_EL1_OFFSET]
Missing VBAR_EL1?
> + mrs x1, ich_vmcr_el2
> + str x1, [x0, #GUEST_ICH_VMCR_EL2_OFFSET]
> +
> + /* x29 contains vector offset from entry */
> + mov x1, x29
> + bl guest_c_exception_handler
> + cbz x0, guest_resume_guest
> +
> + /* EXIT */
> + /* Restore Host Callee-Saved Regs */
> + ldp x19, x20, [sp], #16
> + ldp x21, x22, [sp], #16
> + ldp x23, x24, [sp], #16
> + ldp x25, x26, [sp], #16
> + ldp x27, x28, [sp], #16
> + ldp x29, x30, [sp], #16
> + ret
> +
> + /* RESUME */
> +guest_resume_guest:
> + mrs x0, tpidr_el2
> + ldr x1, [x0, #GUEST_ELR_OFFSET]
> + msr elr_el2, x1
> + ldr x1, [x0, #GUEST_SPSR_OFFSET]
> + msr spsr_el2, x1
> + ldr x1, [x0, #GUEST_SP_EL1_OFFSET]
> + msr sp_el1, x1
> +
> + ldp x1, x2, [x0, #8]
> + ldp x3, x4, [x0, #24]
> + ldp x5, x6, [x0, #40]
> + ldp x7, x8, [x0, #56]
> + ldp x9, x10, [x0, #72]
> + ldp x11, x12, [x0, #88]
> + ldp x13, x14, [x0, #104]
> + ldp x15, x16, [x0, #120]
> + ldp x17, x18, [x0, #136]
> + ldp x19, x20, [x0, #152]
> + ldp x21, x22, [x0, #168]
> + ldp x23, x24, [x0, #184]
> + ldp x25, x26, [x0, #200]
> + ldp x27, x28, [x0, #216]
> + ldp x29, x30, [x0, #232]
> + ldr x0, [x0, #0]
> + eret
> +
> +/* EL1 Vector Table */
> +.align 11
> +.global guest_el1_vectors
> +guest_el1_vectors:
> + /* Sync (0x000) */
> + .skip 0x200
> + /* Sync (0x200) */
> + stp x29, x30, [sp, #-16]!
> + mov x29, #0
> + b guest_el1_common
> + .skip 0x80 - 12
> + /* IRQ (0x280) */
> + stp x29, x30, [sp, #-16]!
> + mov x29, #1
> + b guest_el1_common
> + .skip 0x80 - 12
> + /* FIQ (0x300) */
> + stp x29, x30, [sp, #-16]!
> + mov x29, #2
> + b guest_el1_common
> + .skip 0x80 - 12
> + /* SError (0x380) */
> + stp x29, x30, [sp, #-16]!
> + mov x29, #3
> + b guest_el1_common
> + .skip 0x400
> +
> +guest_el1_common:
> + sub sp, sp, #264
> + stp x0, x1, [sp, #0]
> + stp x2, x3, [sp, #16]
> + stp x4, x5, [sp, #32]
> + stp x6, x7, [sp, #48]
> + stp x8, x9, [sp, #64]
> + stp x10, x11, [sp, #80]
> + stp x12, x13, [sp, #96]
> + stp x14, x15, [sp, #112]
> + stp x16, x17, [sp, #128]
> + stp x18, x19, [sp, #144]
> + stp x20, x21, [sp, #160]
> + stp x22, x23, [sp, #176]
> + stp x24, x25, [sp, #192]
> + stp x26, x27, [sp, #208]
> + stp x28, x30, [sp, #224]
> +
> + mrs x0, elr_el1
> + str x0, [sp, #248]
> + mrs x0, spsr_el1
> + str x0, [sp, #256]
> +
> + mov x0, sp
> + mov x1, x29
> + bl guest_el1_c_handler
> +
> + ldr x0, [sp, #248]
> + msr elr_el1, x0
> + ldr x0, [sp, #256]
> + msr spsr_el1, x0
> +
> + ldp x0, x1, [sp, #0]
> + ldp x2, x3, [sp, #16]
> + ldp x4, x5, [sp, #32]
> + ldp x6, x7, [sp, #48]
> + ldp x8, x9, [sp, #64]
> + ldp x10, x11, [sp, #80]
> + ldp x12, x13, [sp, #96]
> + ldp x14, x15, [sp, #112]
> + ldp x16, x17, [sp, #128]
> + ldp x18, x19, [sp, #144]
> + ldp x20, x21, [sp, #160]
> + ldp x22, x23, [sp, #176]
> + ldp x24, x25, [sp, #192]
> + ldp x26, x27, [sp, #208]
> + ldp x28, x30, [sp, #224]
> +
> + add sp, sp, #264
> + ldp x29, x30, [sp], #16
> + eret
> --
Thanks,
Joey
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [kvm-unit-tests PATCH v1 1/3] lib: arm64: Add stage2 page table management library
2026-03-16 22:43 ` [kvm-unit-tests PATCH v1 1/3] lib: arm64: Add stage2 page table management library Jing Zhang
@ 2026-03-24 15:12 ` Wei-Lin Chang
0 siblings, 0 replies; 10+ messages in thread
From: Wei-Lin Chang @ 2026-03-24 15:12 UTC (permalink / raw)
To: Jing Zhang, KVM, KVMARM
Cc: Marc Zyngier, Joey Gouly, Andrew Jones, Alexandru Elisei,
Oliver Upton
Hi,
On Mon, Mar 16, 2026 at 03:43:47PM -0700, Jing Zhang wrote:
> Tests running at EL2 (hypervisor level) often require the ability to
> manage Stage 2 translation tables to control Guest Physical Address (IPA)
> to Host Physical Address (PA) translation.
>
> Add a generic Stage 2 MMU library that provides software management of
> ARM64 Stage 2 translation tables.
>
> The library features include:
> - Support for 4K, 16K, and 64K translation granules.
> - Dynamic page table allocation using the allocator.
> - Support for 2M block mappings where applicable.
> - APIs for mapping, unmapping, enabling, and disabling the Stage 2 MMU.
> - Basic fault info reporting (ESR, FAR, HPFAR).
>
> This infrastructure is necessary for upcoming virtualization and
> hypervisor-mode tests.
>
> Signed-off-by: Jing Zhang <jingzhangos@google.com>
> ---
> arm/Makefile.arm64 | 1 +
> lib/arm64/asm/stage2_mmu.h | 74 +++++++
> lib/arm64/stage2_mmu.c | 402 +++++++++++++++++++++++++++++++++++++
> 3 files changed, 477 insertions(+)
> create mode 100644 lib/arm64/asm/stage2_mmu.h
> create mode 100644 lib/arm64/stage2_mmu.c
>
> diff --git a/arm/Makefile.arm64 b/arm/Makefile.arm64
> index a40c830d..5e50f5ba 100644
> --- a/arm/Makefile.arm64
> +++ b/arm/Makefile.arm64
> @@ -40,6 +40,7 @@ cflatobjs += lib/arm64/stack.o
> cflatobjs += lib/arm64/processor.o
> cflatobjs += lib/arm64/spinlock.o
> cflatobjs += lib/arm64/gic-v3-its.o lib/arm64/gic-v3-its-cmd.o
> +cflatobjs += lib/arm64/stage2_mmu.o
>
> ifeq ($(CONFIG_EFI),y)
> cflatobjs += lib/acpi.o
> diff --git a/lib/arm64/asm/stage2_mmu.h b/lib/arm64/asm/stage2_mmu.h
> new file mode 100644
> index 00000000..c9e931a8
> --- /dev/null
> +++ b/lib/arm64/asm/stage2_mmu.h
> @@ -0,0 +1,74 @@
> +/*
> + * Copyright (C) 2026, Google LLC.
> + * Author: Jing Zhang <jingzhangos@google.com>
> + *
> + * SPDX-License-Identifier: LGPL-2.0-or-later
> + */
> +#ifndef _ASMARM64_STAGE2_MMU_H_
> +#define _ASMARM64_STAGE2_MMU_H_
> +
> +#include <libcflat.h>
> +#include <asm/page.h>
> +#include <asm/pgtable.h>
> +
> +#define pte_is_table(pte) (pte_val(pte) & PTE_TABLE_BIT)
> +
> +/* Stage-2 Memory Attributes (MemAttr[3:0]) */
> +#define S2_MEMATTR_NORMAL (0xFUL << 2) /* Normal Memory, Outer/Inner Write-Back */
> +#define S2_MEMATTR_DEVICE (0x0UL << 2) /* Device-nGnRnE */
> +
> +#define ESR_ELx_EC_SHIFT (26)
> +#define ESR_ELx_EC_HVC64 UL(0x16)
> +#define ESR_ELx_EC_DABT_LOW UL(0x24)
nit:
This looks out of place to me, could this be better if moved to guest.h?
> +
> +/* Stage-2 Access Permissions (S2AP[1:0]) */
> +#define S2AP_NONE (0UL << 6)
> +#define S2AP_RO (1UL << 6) /* Read-only */
> +#define S2AP_WO (2UL << 6) /* Write-only */
> +#define S2AP_RW (3UL << 6) /* Read-Write */
> +
> +/* Flags for mapping */
> +#define S2_MAP_RW (S2AP_RW | S2_MEMATTR_NORMAL | PTE_AF | PTE_SHARED)
> +#define S2_MAP_DEVICE (S2AP_RW | S2_MEMATTR_DEVICE | PTE_AF)
> +
> +enum s2_granule {
> + S2_PAGE_4K,
> + S2_PAGE_16K,
> + S2_PAGE_64K,
> +};
> +
> +/* Main Stage-2 MMU Structure */
> +struct s2_mmu {
> + pgd_t *pgd;
> + int vmid;
> +
> + /* Configuration */
> + enum s2_granule granule;
> + bool allow_block_mappings;
> +
> + /* Internal helpers calculated from granule & VA_BITS */
> + unsigned int page_shift;
> + unsigned int level_shift;
> + int root_level; /* 0, 1, or 2 */
> + unsigned long page_size;
> + unsigned long block_size;
> +};
> +
> +/* API */
> +/* Initialize an s2_mmu struct with specific settings */
> +struct s2_mmu *s2mmu_init(int vmid, enum s2_granule granule, bool allow_block_mappings);
> +
> +/* Management */
> +void s2mmu_destroy(struct s2_mmu *mmu);
> +void s2mmu_map(struct s2_mmu *mmu, unsigned long ipa, unsigned long pa,
> + unsigned long size, unsigned long flags);
> +void s2mmu_unmap(struct s2_mmu *mmu, unsigned long ipa, unsigned long size);
> +
> +/* Activation */
> +void s2mmu_enable(struct s2_mmu *mmu);
> +void s2mmu_disable(struct s2_mmu *mmu);
> +
> +/* Debug */
> +void s2mmu_print_fault_info(void);
> +
> +#endif /* _ASMARM64_STAGE2_MMU_H_ */
> diff --git a/lib/arm64/stage2_mmu.c b/lib/arm64/stage2_mmu.c
> new file mode 100644
> index 00000000..bfe87eac
> --- /dev/null
> +++ b/lib/arm64/stage2_mmu.c
> @@ -0,0 +1,402 @@
> +/*
> + * Copyright (C) 2026, Google LLC.
> + * Author: Jing Zhang <jingzhangos@google.com>
> + *
> + * SPDX-License-Identifier: LGPL-2.0-or-later
> + */
> +#include <libcflat.h>
> +#include <alloc.h>
> +#include <asm/stage2_mmu.h>
> +#include <asm/sysreg.h>
> +#include <asm/io.h>
> +#include <asm/barrier.h>
> +#include <alloc_page.h>
> +
> +/* VTCR_EL2 Definitions */
> +#define VTCR_SH0_INNER (3UL << 12)
> +#define VTCR_ORGN0_WBWA (1UL << 10)
> +#define VTCR_IRGN0_WBWA (1UL << 8)
> +
> +/* TG0 Encodings */
> +#define VTCR_TG0_4K (0UL << 14)
> +#define VTCR_TG0_64K (1UL << 14)
> +#define VTCR_TG0_16K (2UL << 14)
> +
> +/* Physical Address Size (PS) - Derive from VA_BITS for simplicity or max */
> +#if VA_BITS > 40
> +#define VTCR_PS_VAL (5UL << 16) /* 48-bit PA */
> +#else
> +#define VTCR_PS_VAL (2UL << 16) /* 40-bit PA */
> +#endif
> +
> +struct s2_mmu *s2mmu_init(int vmid, enum s2_granule granule, bool allow_block_mappings)
> +{
> + struct s2_mmu *mmu = calloc(1, sizeof(struct s2_mmu));
> + int order = 0;
> +
> + mmu->vmid = vmid;
> + mmu->granule = granule;
> + mmu->allow_block_mappings = allow_block_mappings;
> +
> + /* Configure shifts based on granule */
> + switch (granule) {
> + case S2_PAGE_4K:
> + mmu->page_shift = 12;
> + mmu->level_shift = 9;
> + /*
> + * Determine Root Level for 4K:
> + * VA_BITS > 39 (e.g. 48) -> Start L0
> + * VA_BITS <= 39 (e.g. 32, 36) -> Start L1
> + */
> + mmu->root_level = (VA_BITS > 39) ? 0 : 1;
> + break;
> + case S2_PAGE_16K:
> + mmu->page_shift = 14;
> + mmu->level_shift = 11;
> + /*
> + * 16K: L1 covers 47 bits. L0 not valid for 16K
> + * Start L1 for 47 bits. Start L2 for 36 bits.
> + */
> + mmu->root_level = (VA_BITS > 36) ? 1 : 2;
> + break;
> + case S2_PAGE_64K:
> + mmu->page_shift = 16;
> + mmu->level_shift = 13;
> + /* 64K: L1 covers 52 bits. L2 covers 42 bits. */
> + mmu->root_level = (VA_BITS > 42) ? 1 : 2;
> + break;
> + }
> +
> + mmu->page_size = 1UL << mmu->page_shift;
> + mmu->block_size = 1UL << (mmu->page_shift + mmu->level_shift);
> +
> + /* Alloc PGD. Use order for allocation size */
> + if (mmu->page_size > PAGE_SIZE) {
> + order = __builtin_ctz(mmu->page_size / PAGE_SIZE);
> + }
> + mmu->pgd = (pgd_t *)alloc_pages(order);
> + if (mmu->pgd) {
> + memset(mmu->pgd, 0, mmu->page_size);
> + } else {
> + free(mmu);
> + return NULL;
> + }
> +
> + return mmu;
> +}
> +
> +static unsigned long s2mmu_get_addr_mask(struct s2_mmu *mmu)
> +{
> + switch (mmu->granule) {
> + case S2_PAGE_16K:
> + return GENMASK_ULL(47, 14);
> + case S2_PAGE_64K:
> + return GENMASK_ULL(47, 16);
> + default:
> + return GENMASK_ULL(47, 12); /* 4K */
> + }
> +}
> +
> +static void s2mmu_free_tables(struct s2_mmu *mmu, pte_t *table, int level)
> +{
> + unsigned long entries = 1UL << mmu->level_shift;
> + unsigned long mask = s2mmu_get_addr_mask(mmu);
> + unsigned long i;
> +
> + /*
> + * Recurse if not leaf level
> + * Level 3 is always leaf page. Levels 0-2 can be Table or Block.
> + */
> + if (level < 3) {
> + for (i = 0; i < entries; i++) {
> + pte_t entry = table[i];
> + if ((pte_valid(entry) && pte_is_table(entry))) {
> + pte_t *next = (pte_t *)phys_to_virt(pte_val(entry) & mask);
> + s2mmu_free_tables(mmu, next, level + 1);
> + }
> + }
> + }
> +
> + free_pages(table);
> +}
> +
> +void s2mmu_destroy(struct s2_mmu *mmu)
> +{
> + if (mmu->pgd)
> + s2mmu_free_tables(mmu, (pte_t *)mmu->pgd, mmu->root_level);
> + free(mmu);
> +}
> +
> +void s2mmu_enable(struct s2_mmu *mmu)
> +{
> + unsigned long vtcr = VTCR_PS_VAL | VTCR_SH0_INNER |
> + VTCR_ORGN0_WBWA | VTCR_IRGN0_WBWA;
> + unsigned long t0sz = 64 - VA_BITS;
> + unsigned long vttbr;
> +
> + switch (mmu->granule) {
> + case S2_PAGE_4K:
> + vtcr |= VTCR_TG0_4K;
> + /* SL0 Encodings for 4K: 0=L2, 1=L1, 2=L0 */
> + if (mmu->root_level == 0)
> + vtcr |= (2UL << 6); /* Start L0 */
> + else if (mmu->root_level == 1)
> + vtcr |= (1UL << 6); /* Start L1 */
> + else
> + vtcr |= (0UL << 6); /* Start L2 */
> + break;
> + case S2_PAGE_16K:
> + vtcr |= VTCR_TG0_16K;
> + /* SL0 Encodings for 16K: 0=L3(Res), 1=L2, 2=L1, 3=L0(Res) */
> + if (mmu->root_level == 1)
> + vtcr |= (2UL << 6); /* Start L1 */
> + else
> + vtcr |= (1UL << 6); /* Start L2 */
> + break;
> + case S2_PAGE_64K:
> + vtcr |= VTCR_TG0_64K;
> + /* SL0 Encodings for 64K: 0=L3(Res), 1=L2, 2=L1, 3=L0(Res) */
> + if (mmu->root_level == 1)
> + vtcr |= (2UL << 6); /* Start L1 */
> + else
> + vtcr |= (1UL << 6); /* Start L2 */
> + break;
> + }
The pattern (xUL << 6) is repeated many times, perhaps turn it into a
macro, then the comments behind it can be omitted as well.
> +
> + vtcr |= t0sz;
> +
> + write_sysreg(vtcr, vtcr_el2);
> + isb();
> +
> + /* Setup VTTBR */
> + vttbr = virt_to_phys(mmu->pgd);
> + vttbr |= ((unsigned long)mmu->vmid << 48);
> + write_sysreg(vttbr, vttbr_el2);
> + isb();
> +
> + asm volatile("tlbi vmalls12e1is");
> + dsb(ish);
> + isb();
I don't think you need the previous isb's other that this last one, the
effects of change vtcr_el2 and vttbr_el2 need not to be observed
immediately.
> +}
> +
> +void s2mmu_disable(struct s2_mmu *mmu)
> +{
> + write_sysreg(0, vttbr_el2);
> + isb();
> +}
I see you have HCR_DC further down, but writing 0s to vttbr_el2 does not mean
s2 translation gets disabled.
Thanks,
Wei-Lin Chang
> +
> +static pte_t *get_pte(struct s2_mmu *mmu, pte_t *table, unsigned long idx, bool alloc)
> +{
> + unsigned long mask = s2mmu_get_addr_mask(mmu);
> + pte_t entry = table[idx];
> + pte_t *next_table;
> + int order = 0;
> +
> + if (pte_valid(entry)) {
> + if (pte_is_table(entry))
> + return (pte_t *)phys_to_virt(pte_val(entry) & mask);
> + /* Block Entry */
> + return NULL;
> + }
> +
> + if (!alloc)
> + return NULL;
> +
> + /* Allocate table memory covering the Stage-2 Granule size */
> + if (mmu->page_size > PAGE_SIZE)
> + order = __builtin_ctz(mmu->page_size / PAGE_SIZE);
> +
> + next_table = (pte_t *)alloc_pages(order);
> + if (next_table)
> + memset(next_table, 0, mmu->page_size);
> +
> + pte_val(entry) = virt_to_phys(next_table) | PTE_TABLE_BIT | PTE_VALID;
> + WRITE_ONCE(table[idx], entry);
> +
> + return next_table;
> +}
> +
> +void s2mmu_map(struct s2_mmu *mmu, unsigned long ipa, unsigned long pa,
> + unsigned long size, unsigned long flags)
> +{
> + unsigned long level_mask, level_shift, level_size, level;
> + unsigned long start_ipa, end_ipa, idx;
> + pte_t entry, *table, *next_table;
> + bool is_block_level;
> +
> + start_ipa = ipa;
> + end_ipa = ipa + size;
> + level_mask = (1UL << mmu->level_shift) - 1;
> +
> + while (start_ipa < end_ipa) {
> + table = (pte_t *)mmu->pgd;
> +
> + /* Walk from Root to Leaf */
> + for (level = mmu->root_level; level < 3; level++) {
> + level_shift = mmu->page_shift + (3 - level) * mmu->level_shift;
> + idx = (start_ipa >> level_shift) & level_mask;
> + level_size = 1UL << level_shift;
> +
> + /*
> + * Check for Block Mapping
> + * Valid Block Levels:
> + * 4K: L1 (1G), L2 (2MB)
> + * 16K: L2 (32MB)
> + * 64K: L2 (512MB)
> + */
> + is_block_level = (level == 2) ||
> + (mmu->granule == S2_PAGE_4K && level == 1);
> +
> + if (mmu->allow_block_mappings && is_block_level) {
> + if ((start_ipa & (level_size - 1)) == 0 &&
> + (pa & (level_size - 1)) == 0 &&
> + (start_ipa + level_size) <= end_ipa) {
> + /* Map Block */
> + pte_val(entry) = (pa & ~(level_size - 1)) |
> + flags | PTE_VALID;
> + WRITE_ONCE(table[idx], entry);
> + start_ipa += level_size;
> + pa += level_size;
> + goto next_chunk; /* Continue outer loop */
> + }
> + }
> +
> + /* Move to next level */
> + next_table = get_pte(mmu, table, idx, true);
> + if (!next_table) {
> + printf("Error allocating or existing block conflict.\n");
> + return;
> + }
> + table = next_table;
> + }
> +
> + /* Leaf Level (Level 3 PTE) */
> + if (level == 3) {
> + idx = (start_ipa >> mmu->page_shift) & level_mask;
> + pte_val(entry) = (pa & ~(mmu->page_size - 1)) | flags | PTE_TYPE_PAGE;
> + WRITE_ONCE(table[idx], entry);
> + start_ipa += mmu->page_size;
> + pa += mmu->page_size;
> + }
> +
> +next_chunk:
> + continue;
> + }
> +
> + asm volatile("tlbi vmalls12e1is");
> + dsb(ish);
> + isb();
> +}
> +
> +/*
> + * Recursive helper to unmap a range within a specific table.
> + * Returns true if the table at this level is now completely empty
> + * and should be freed by the caller.
> + */
> +static bool s2mmu_unmap_level(struct s2_mmu *mmu, pte_t *table,
> + unsigned long current_ipa, int level,
> + unsigned long start_ipa, unsigned long end_ipa,
> + unsigned long mask)
> +{
> + unsigned long level_size, entry_ipa, entry_end;
> + bool child_empty, table_empty = true;
> + pte_t entry, *next_table;
> + unsigned int level_shift;
> + unsigned long i;
> +
> + /* Calculate shift and size for this level */
> + if (level == 3) {
> + level_shift = mmu->page_shift;
> + } else {
> + level_shift = mmu->page_shift + (3 - level) * mmu->level_shift;
> + }
> + level_size = 1UL << level_shift;
> +
> + /* Iterate over all entries in this table */
> + for (i = 0; i < (1UL << mmu->level_shift); i++) {
> + entry = table[i];
> + entry_ipa = current_ipa + (i * level_size);
> + entry_end = entry_ipa + level_size;
> +
> + /* Skip entries completely outside our target range */
> + if (entry_end <= start_ipa || entry_ipa >= end_ipa) {
> + if (pte_valid(entry))
> + table_empty = false;
> + continue;
> + }
> +
> + /*
> + * If the entry is fully covered by the unmap range,
> + * we can clear it (leaf) or recurse and free (table).
> + */
> + if (entry_ipa >= start_ipa && entry_end <= end_ipa) {
> + if (pte_valid(entry)) {
> + if (pte_is_table(entry) && level < 3) {
> + /* Recurse to free children first */
> + next_table = (pte_t *)phys_to_virt(pte_val(entry) & mask);
> + s2mmu_free_tables(mmu, next_table, level + 1);
> + }
> + /* Invalidate the entry */
> + WRITE_ONCE(table[i], __pte(0));
> + }
> + continue;
> + }
> +
> + /*
> + * Partial overlap: This must be a table (split required).
> + * If it's a Block, we can't split easily in this context
> + * without complex logic, so we generally skip or fail.
> + * Assuming standard breakdown: recurse into the table.
> + */
> + if (pte_valid(entry) && pte_is_table(entry) && level < 3) {
> + next_table = (pte_t *)phys_to_virt(pte_val(entry) & mask);
> + child_empty = s2mmu_unmap_level(mmu, next_table, entry_ipa, level + 1,
> + start_ipa, end_ipa, mask);
> +
> + if (child_empty) {
> + free_pages(next_table);
> + WRITE_ONCE(table[i], __pte(0));
> + } else {
> + table_empty = false;
> + }
> + } else if (pte_valid(entry)) {
> + /*
> + * Overlap on a leaf/block entry that extends
> + * beyond the unmap range. We cannot simply clear it.
> + */
> + table_empty = false;
> + }
> + }
> +
> + return table_empty;
> +}
> +
> +void s2mmu_unmap(struct s2_mmu *mmu, unsigned long ipa, unsigned long size)
> +{
> + unsigned long end_ipa = ipa + size;
> + unsigned long mask = s2mmu_get_addr_mask(mmu);
> +
> + if (!mmu->pgd)
> + return;
> +
> + /*
> + * Start recursion from the root level.
> + * We rarely free the PGD itself unless destroying the MMU,
> + * so we ignore the return value here.
> + */
> + s2mmu_unmap_level(mmu, (pte_t *)mmu->pgd, 0, mmu->root_level,
> + ipa, end_ipa, mask);
> +
> + /* Ensure TLB invalidation occurs after page table updates */
> + asm volatile("tlbi vmalls12e1is");
> + dsb(ish);
> + isb();
> +}
> +
> +void s2mmu_print_fault_info(void)
> +{
> + unsigned long esr = read_sysreg(esr_el2);
> + unsigned long far = read_sysreg(far_el2);
> + unsigned long hpfar = read_sysreg(hpfar_el2);
> + printf("Stage-2 Fault Info: ESR=0x%lx FAR=0x%lx HPFAR=0x%lx\n", esr, far, hpfar);
> +}
> --
> 2.53.0.851.ga537e3e6e9-goog
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [kvm-unit-tests PATCH v1 2/3] lib: arm64: Add bare-metal guest execution framework
2026-03-16 22:43 ` [kvm-unit-tests PATCH v1 2/3] lib: arm64: Add bare-metal guest execution framework Jing Zhang
` (2 preceding siblings ...)
2026-03-24 15:04 ` Joey Gouly
@ 2026-03-24 15:44 ` Wei-Lin Chang
3 siblings, 0 replies; 10+ messages in thread
From: Wei-Lin Chang @ 2026-03-24 15:44 UTC (permalink / raw)
To: Jing Zhang, KVM, KVMARM
Cc: Marc Zyngier, Joey Gouly, Andrew Jones, Alexandru Elisei,
Oliver Upton
Hi,
On Mon, Mar 16, 2026 at 03:43:48PM -0700, Jing Zhang wrote:
> To test advanced KVM features such as nested virtualization (NV) and
> GICv4 direct interrupt injection, kvm-unit-tests needs the ability to
> act as an L1 hypervisor running at EL2 and manage its own L2 guests.
>
> Introduce a lightweight guest management library that provides the
> infrastructure to create, configure, and execute nested guests.
>
> This framework includes:
> - Guest lifecycle management: `guest_create()` and `guest_destroy()`
> APIs to allocate guest context and setup Stage-2 identity mappings
> for code and stack using the s2mmu library.
> - Context switching: The `guest_run()` assembly routine handles
> saving the host (L1) callee-saved registers and loading the guest
> (L2) GPRs and EL1 system registers.
> - VM-Exit handling: Installs an EL2 trap handler (`guest_hyp_vectors`)
> to intercept guest exits and route them to `guest_c_exception_handler`
> to determine whether to return to the host test logic or resume.
> - Guest-internal exceptions: Provides `guest_el1_vectors` to catch
> Sync, IRQ, FIQ, and SError exceptions occurring entirely within the
> guest (EL1) without trapping to the host.
>
> Signed-off-by: Jing Zhang <jingzhangos@google.com>
Thank you for the effort.
To me this feels too much trying to add all these in one go. I think at
least guest-internal exceptions, and usage of stage-2 (start from no
stage-2 for L2), can probably be split into its separate commits.
> ---
> arm/Makefile.arm64 | 2 +
> lib/arm64/asm/guest.h | 156 ++++++++++++++++++++++++
> lib/arm64/guest.c | 197 ++++++++++++++++++++++++++++++
> lib/arm64/guest_arch.S | 263 +++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 618 insertions(+)
> create mode 100644 lib/arm64/asm/guest.h
> create mode 100644 lib/arm64/guest.c
> create mode 100644 lib/arm64/guest_arch.S
>
> diff --git a/arm/Makefile.arm64 b/arm/Makefile.arm64
> index 5e50f5ba..9026fd71 100644
> --- a/arm/Makefile.arm64
> +++ b/arm/Makefile.arm64
> @@ -41,6 +41,8 @@ cflatobjs += lib/arm64/processor.o
> cflatobjs += lib/arm64/spinlock.o
> cflatobjs += lib/arm64/gic-v3-its.o lib/arm64/gic-v3-its-cmd.o
> cflatobjs += lib/arm64/stage2_mmu.o
> +cflatobjs += lib/arm64/guest.o
> +cflatobjs += lib/arm64/guest_arch.o
>
> ifeq ($(CONFIG_EFI),y)
> cflatobjs += lib/acpi.o
> diff --git a/lib/arm64/asm/guest.h b/lib/arm64/asm/guest.h
> new file mode 100644
> index 00000000..1d70873d
> --- /dev/null
> +++ b/lib/arm64/asm/guest.h
> @@ -0,0 +1,156 @@
> +/*
> + * Copyright (C) 2026, Google LLC.
> + * Author: Jing Zhang <jingzhangos@google.com>
> + *
> + * SPDX-License-Identifier: LGPL-2.0-or-later
> + */
> +#ifndef _ASMARM64_GUEST_H_
> +#define _ASMARM64_GUEST_H_
> +
> +/* Offsets for assembly (Must match struct guest) */
> +#define GUEST_X_OFFSET 0
> +#define GUEST_ELR_OFFSET 248
> +#define GUEST_SPSR_OFFSET 256
> +#define GUEST_HCR_OFFSET 264
> +#define GUEST_VTTBR_OFFSET 272
> +#define GUEST_SCTLR_OFFSET 280
> +#define GUEST_VBAR_OFFSET 288
> +#define GUEST_SP_EL1_OFFSET 296
> +#define GUEST_ESR_OFFSET 304
> +#define GUEST_FAR_OFFSET 312
> +#define GUEST_HPFAR_OFFSET 320
> +#define GUEST_EXIT_CODE_OFFSET 328
> +#define GUEST_TPIDR_EL1_OFFSET 336
> +#define GUEST_ICH_VMCR_EL2_OFFSET 344
> +
> +#ifndef __ASSEMBLY__
> +
> +#include <libcflat.h>
> +#include <asm/stage2_mmu.h>
> +
> +/* HCR_EL2 Definitions */
> +#define HCR_VM (1UL << 0) /* Virtualization Enable */
> +#define HCR_FMO (1UL << 3) /* Physical FIQ Routing */
> +#define HCR_IMO (1UL << 4) /* Physical IRQ Routing */
> +#define HCR_AMO (1UL << 5) /* Physical SError Interrupt Routing */
> +#define HCR_RW (1UL << 31) /* Execution State: AArch64 */
> +#define HCR_DC (1UL << 12) /* Default Cacheable */
> +#define HCR_E2H (1UL << 34) /* EL2 Host */
> +
> +#define HCR_GUEST_FLAGS (HCR_VM | HCR_FMO | HCR_IMO | HCR_AMO | HCR_RW | \
> + HCR_DC | HCR_E2H)
> +
> +/* ICH_VMCR_EL2 bit definition */
> +#define ICH_VMCR_PMR_SHIFT 24
> +#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT)
> +#define ICH_VMCR_ENG0_SHIFT 0
> +#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT)
> +#define ICH_VMCR_ENG1_SHIFT 1
> +#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT)
> +
> +/* Guest stack size */
> +#define GUEST_STACK_SIZE SZ_64K
> +
> +/*
> + * Result from Handler:
> + * RESUME: Keep guest running (ERET immediately)
> + * EXIT: Return to Host C caller
> + */
> +enum guest_handler_result {
> + GUEST_ACTION_RESUME,
> + GUEST_ACTION_EXIT
> +};
> +
> +struct guest;
> +typedef enum guest_handler_result (*guest_handler_t)(struct guest *guest);
> +
> +/* EL1 (Guest-internal) Exception Vector */
> +enum guest_el1_vector {
> + GUEST_EL1_SYNC,
> + GUEST_EL1_IRQ,
> + GUEST_EL1_FIQ,
> + GUEST_EL1_SERROR,
> + GUEST_EL1_MAX
> +};
> +
> +/*
> + * Guest EL1 Exception Frame (pushed to guest stack by asm stub)
> + * We use a simplified frame: x0-x30, elr, spsr. size = 33*8
> + */
> +struct guest_el1_regs {
> + unsigned long regs[31];
> + unsigned long elr;
> + unsigned long spsr;
> +};
> +
> +typedef void (*guest_el1_handler_t)(struct guest_el1_regs *regs, unsigned int esr);
> +
> +/* Exceptions from the Guest (Lower EL using AArch64) */
> +enum guest_vector {
> + GUEST_VECTOR_SYNC,
> + GUEST_VECTOR_IRQ,
> + GUEST_VECTOR_FIQ,
> + GUEST_VECTOR_SERROR,
> + GUEST_VECTOR_MAX
> +};
> +
> +/*
> + * Guest Context Structure
> + * This will be pointed to by TPIDR_EL1 while the guest is running.
> + */
> +struct guest_context {
> + guest_el1_handler_t handlers[GUEST_EL1_MAX];
> +};
> +
> +struct guest {
> + /* 0x000: General Purpose Registers */
> + unsigned long x[31]; /* x0..x30 */
> +
> + /* 0x0F8: Execution State */
> + unsigned long elr_el2;
> + unsigned long spsr_el2;
> +
> + /* 0x108: Control Registers */
> + unsigned long hcr_el2;
> + unsigned long vttbr_el2;
> + unsigned long sctlr_el1;
> + unsigned long vbar_el1;
> + unsigned long sp_el1;
> +
> + /* 0x130: Exit Information */
> + unsigned long esr_el2;
> + unsigned long far_el2;
> + unsigned long hpfar_el2;
> + unsigned long exit_code; /* enum guest_vector */
> + unsigned long tpidr_el1;
> +
> + /* 0x158: GIC Registers */
> + unsigned long ich_vmcr_el2;
> +
> + /* 0x160: Exception Handlers */
> + guest_handler_t handlers[GUEST_VECTOR_MAX];
> + struct guest_context *guest_context;
> +
> + struct s2_mmu *s2mmu;
> +};
> +
> +/* API */
> +struct guest *guest_create(int vmid, void (*guest_func)(void), enum s2_granule granule);
> +void guest_destroy(struct guest *guest);
> +
> +/* Configuration */
> +void guest_set_vector(struct guest *guest, void *vector_table);
> +void guest_set_stack(struct guest *guest, void *stack_top);
> +void guest_install_handler(struct guest *guest, enum guest_vector v, guest_handler_t handler);
> +
> +/* Install handler for exceptions INSIDE EL1 */
> +void guest_install_el1_handler(struct guest *guest, enum guest_el1_vector v, guest_el1_handler_t handler);
> +
> +unsigned long guest_c_exception_handler(struct guest *guest, unsigned long vector_offset);
> +void guest_el1_c_handler(struct guest_el1_regs *regs, unsigned int vector);
> +
> +/* Core Run Loop */
> +void guest_run(struct guest *guest);
> +
> +#endif /* __ASSEMBLY__ */
> +#endif /* _ASMARM64_GUEST_H_ */
> diff --git a/lib/arm64/guest.c b/lib/arm64/guest.c
> new file mode 100644
> index 00000000..6c256c11
> --- /dev/null
> +++ b/lib/arm64/guest.c
> @@ -0,0 +1,197 @@
> +/*
> + * Copyright (C) 2026, Google LLC.
> + * Author: Jing Zhang <jingzhangos@google.com>
> + *
> + * SPDX-License-Identifier: LGPL-2.0-or-later
> + */
> +#include <libcflat.h>
> +#include <asm/guest.h>
> +#include <asm/io.h>
> +#include <asm/sysreg.h>
> +#include <asm/barrier.h>
> +#include <alloc_page.h>
> +#include <alloc.h>
> +
> +/* Compile-time checks to ensure Assembly macros match C Struct */
> +_Static_assert(offsetof(struct guest, x) == GUEST_X_OFFSET,
> + "GUEST_X_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, elr_el2) == GUEST_ELR_OFFSET,
> + "GUEST_ELR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, spsr_el2) == GUEST_SPSR_OFFSET,
> + "GUEST_SPSR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, hcr_el2) == GUEST_HCR_OFFSET,
> + "GUEST_HCR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, vttbr_el2) == GUEST_VTTBR_OFFSET,
> + "GUEST_VTTBR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, sctlr_el1) == GUEST_SCTLR_OFFSET,
> + "GUEST_SCTLR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, vbar_el1) == GUEST_VBAR_OFFSET,
> + "GUEST_VBAR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, sp_el1) == GUEST_SP_EL1_OFFSET,
> + "GUEST_SP_EL1_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, esr_el2) == GUEST_ESR_OFFSET,
> + "GUEST_ESR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, far_el2) == GUEST_FAR_OFFSET,
> + "GUEST_FAR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, hpfar_el2) == GUEST_HPFAR_OFFSET,
> + "GUEST_HPFAR_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, exit_code) == GUEST_EXIT_CODE_OFFSET,
> + "GUEST_EXIT_CODE_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, tpidr_el1) == GUEST_TPIDR_EL1_OFFSET,
> + "GUEST_TPIDR_EL1_OFFSET mismatch");
> +_Static_assert(offsetof(struct guest, ich_vmcr_el2) == GUEST_ICH_VMCR_EL2_OFFSET,
> + "GUEST_ICH_VMCR_EL2_OFFSET mismatch");
> +
> +/*
> + * C-Entry for Exception Handling
> + * Returns 0 to Resume Guest, 1 to Exit to Host Caller
> + */
> +unsigned long guest_c_exception_handler(struct guest *guest, unsigned long vector_offset)
> +{
> + enum guest_vector vector = (enum guest_vector)guest->exit_code;
> +
> + /* Save Trap Info */
> + guest->esr_el2 = read_sysreg(esr_el2);
> + guest->far_el2 = read_sysreg(far_el2);
> + guest->hpfar_el2 = read_sysreg(hpfar_el2);
> +
> + /* Invoke Handler if registered */
> + if (guest->handlers[vector]) {
> + if (guest->handlers[vector](guest) == GUEST_ACTION_RESUME) {
> + return 0; /* ASM stub will restore and ERET */
> + }
> + }
> +
> + /* Default: Exit to caller */
> + return 1;
> +}
> +
> +/* --- EL1 (Guest-Internal) Vector Handling --- */
> +
> +void guest_install_el1_handler(struct guest *guest, enum guest_el1_vector v, guest_el1_handler_t handler)
> +{
> + if (guest && guest->guest_context && v < GUEST_EL1_MAX)
> + guest->guest_context->handlers[v] = handler;
> +}
> +
> +void guest_el1_c_handler(struct guest_el1_regs *regs, unsigned int vector)
> +{
> + struct guest_context *ctx = (struct guest_context *)read_sysreg(tpidr_el1);
> + unsigned int esr = read_sysreg(esr_el1);
> +
> + if (ctx && vector < GUEST_EL1_MAX && ctx->handlers[vector]) {
> + ctx->handlers[vector](regs, esr);
> + } else {
> + printf("Guest: Unhandled Exception Vector %d, ESR=0x%x\n", vector, esr);
> + asm volatile("hvc #0xFFFF");
> + }
> +}
> +
> +extern void guest_el1_vectors(void);
> +
> +static struct guest *__guest_create(struct s2_mmu *s2_ctx, void *entry_point)
> +{
> + struct guest *guest = calloc(1, sizeof(struct guest));
> + struct guest_context *guest_ctx;
> + unsigned long guest_ctx_pa;
> +
> + /* Allocate the internal context table */
> + guest_ctx = (void *)alloc_page();
> + memset(guest_ctx, 0, PAGE_SIZE);
> + guest->guest_context = guest_ctx;
> +
> + guest_ctx_pa = virt_to_phys(guest_ctx);
> + if (s2_ctx)
> + s2mmu_map(s2_ctx, guest_ctx_pa, guest_ctx_pa, PAGE_SIZE, S2_MAP_RW);
> +
> + guest->tpidr_el1 = guest_ctx_pa;;
nit: extra semicolon
Thanks,
Wei-Lin Chang
> +
> + guest->elr_el2 = (unsigned long)entry_point;
> + guest->spsr_el2 = 0x3C5; /* M=EL1h, DAIF=Masked */
> + guest->hcr_el2 = HCR_GUEST_FLAGS;
> +
> + if (s2_ctx) {
> + guest->vttbr_el2 = virt_to_phys(s2_ctx->pgd);
> + guest->vttbr_el2 |= ((unsigned long)s2_ctx->vmid << 48);
> + }
> +
> + guest->sctlr_el1 = read_sysreg(sctlr_el1);
> + guest->sctlr_el1 |= SCTLR_EL1_C | SCTLR_EL1_I | SCTLR_EL1_M;
> +
> + guest->ich_vmcr_el2 = read_sysreg(ich_vmcr_el2);
> + guest->ich_vmcr_el2 |= (0xFFUL << ICH_VMCR_PMR_SHIFT) | (1UL << ICH_VMCR_ENG1_SHIFT);
> +
> + guest->vbar_el1 = (unsigned long)guest_el1_vectors;
> + guest->s2mmu = s2_ctx;
> +
> + return guest;
> +}
> +
> +struct guest *guest_create(int vmid, void (*guest_func)(void), enum s2_granule granule)
> +{
> + unsigned long guest_pa, code_base, stack_pa;
> + unsigned long *stack_page;
> + struct guest *guest;
> + struct s2_mmu *ctx;
> +
> + ctx = s2mmu_init(vmid, granule, true);
> + /*
> + * Map the Host's code segment Identity Mapped (IPA=PA).
> + * To be safe, we map a large chunk (e.g., 2MB) around the function
> + * to capture any helper functions the compiler might generate calls to.
> + */
> + guest_pa = virt_to_phys((void *)guest_func);
> + code_base = guest_pa & ~(SZ_2M - 1);
> + s2mmu_map(ctx, code_base, code_base, SZ_2M, S2_MAP_RW);
> +
> + /*
> + * Map Stack
> + * Allocate 16 pages (64K) in Host, get its PA, and map it for Guest.
> + */
> + stack_page = alloc_pages(get_order(GUEST_STACK_SIZE >> PAGE_SHIFT));
> + stack_pa = virt_to_phys(stack_page);
> + /* Identity Map it (IPA = PA) */
> + s2mmu_map(ctx, stack_pa, stack_pa, GUEST_STACK_SIZE, S2_MAP_RW);
> +
> + s2mmu_enable(ctx);
> +
> + /* Create Guest */
> + /* Entry point is the PA of the function (Identity Mapped) */
> + guest = __guest_create(ctx, (void *)guest_pa);
> +
> + /*
> + * Setup Guest Stack Pointer
> + * Must match where we mapped the stack + Offset
> + */
> + guest_set_stack(guest, (void *)(stack_pa + GUEST_STACK_SIZE));
> +
> + /* Map UART identity mapped, printf() available to guest */
> + s2mmu_map(ctx, 0x09000000, 0x09000000, PAGE_SIZE, S2_MAP_DEVICE);
> +
> + return guest;
> +}
> +
> +void guest_destroy(struct guest *guest)
> +{
> + s2mmu_disable(guest->s2mmu);
> + s2mmu_destroy(guest->s2mmu);
> + if (guest->guest_context)
> + free_page(guest->guest_context);
> + free(guest);
> +}
> +
> +void guest_set_vector(struct guest *guest, void *vector_table)
> +{
> + guest->vbar_el1 = (unsigned long)vector_table;
> +}
> +
> +void guest_set_stack(struct guest *guest, void *stack_top)
> +{
> + guest->sp_el1 = (unsigned long)stack_top;
> +}
> +
> +void guest_install_handler(struct guest *guest, enum guest_vector v, guest_handler_t handler)
> +{
> + if (v < GUEST_VECTOR_MAX)
> + guest->handlers[v] = handler;
> +}
> diff --git a/lib/arm64/guest_arch.S b/lib/arm64/guest_arch.S
> new file mode 100644
> index 00000000..cb7074d7
> --- /dev/null
> +++ b/lib/arm64/guest_arch.S
> @@ -0,0 +1,263 @@
> +/*
> + * Copyright (C) 2026, Google LLC.
> + * Author: Jing Zhang <jingzhangos@google.com>
> + *
> + * SPDX-License-Identifier: LGPL-2.0-or-later
> + */
> +#define __ASSEMBLY__
> +#include <asm/guest.h>
> +
> +.global guest_run
> +guest_run:
> + /* x0 = struct guest pointer */
> +
> + /* Save Host Callee-Saved Regs */
> + stp x29, x30, [sp, #-16]!
> + stp x27, x28, [sp, #-16]!
> + stp x25, x26, [sp, #-16]!
> + stp x23, x24, [sp, #-16]!
> + stp x21, x22, [sp, #-16]!
> + stp x19, x20, [sp, #-16]!
> +
> + /* Cache Guest Pointer in TPIDR_EL2 */
> + msr tpidr_el2, x0
> +
> + /* Configure ICC_SRE_EL2 to allow EL1 access to SysRegs */
> + /* Bit 3 (Enable) = 1, Bit 0 (SRE) = 1 */
> + mrs x1, icc_sre_el2
> + orr x1, x1, #1
> + orr x1, x1, #(1 << 3)
> + msr icc_sre_el2, x1
> + isb
> +
> + /* Enable virtual CPU interface */
> + mrs x1, ich_hcr_el2
> + orr x1, x1, #1
> + msr ich_hcr_el2, x1
> +
> + /* Load Guest System Registers */
> + ldr x1, [x0, #GUEST_ELR_OFFSET]
> + msr elr_el2, x1
> + ldr x1, [x0, #GUEST_SPSR_OFFSET]
> + msr spsr_el2, x1
> + ldr x1, [x0, #GUEST_HCR_OFFSET]
> + msr hcr_el2, x1
> + ldr x1, [x0, #GUEST_VTTBR_OFFSET]
> + msr vttbr_el2, x1
> + ldr x1, [x0, #GUEST_SCTLR_OFFSET]
> + msr S3_5_c1_c0_0, x1
> + ldr x1, [x0, #GUEST_VBAR_OFFSET]
> + msr S3_5_c12_c0_0, x1
> + ldr x1, [x0, #GUEST_SP_EL1_OFFSET]
> + msr sp_el1, x1
> + ldr x1, [x0, #GUEST_TPIDR_EL1_OFFSET]
> + msr tpidr_el1, x1
> + ldr x1, [x0, #GUEST_ICH_VMCR_EL2_OFFSET]
> + msr ich_vmcr_el2, x1
> +
> + /* Load Guest GPRs */
> + ldp x1, x2, [x0, #8]
> + ldp x3, x4, [x0, #24]
> + ldp x5, x6, [x0, #40]
> + ldp x7, x8, [x0, #56]
> + ldp x9, x10, [x0, #72]
> + ldp x11, x12, [x0, #88]
> + ldp x13, x14, [x0, #104]
> + ldp x15, x16, [x0, #120]
> + ldp x17, x18, [x0, #136]
> + ldp x19, x20, [x0, #152]
> + ldp x21, x22, [x0, #168]
> + ldp x23, x24, [x0, #184]
> + ldp x25, x26, [x0, #200]
> + ldp x27, x28, [x0, #216]
> + ldp x29, x30, [x0, #232]
> + ldr x0, [x0, #0]
> +
> + /* Install Trap Handler */
> + adrp x29, guest_hyp_vectors
> + add x29, x29, :lo12:guest_hyp_vectors
> + msr vbar_el2, x29
> +
> + /* Restore x29 from struct (via tpidr_el2) */
> + mrs x29, tpidr_el2
> + ldr x29, [x29, #232]
> +
> + isb
> + eret
> +
> + .align 11
> +guest_hyp_vectors:
> + .skip 0x400
> +
> +guest_exit_sync:
> + stp x0, x1, [sp, #-16]!
> + mrs x0, tpidr_el2
> + mov x1, #0
> + str x1, [x0, #GUEST_EXIT_CODE_OFFSET]
> + b guest_common_exit
> +
> + .balign 0x80
> +
> +guest_exit_irq:
> + stp x0, x1, [sp, #-16]!
> + mrs x0, tpidr_el2
> + mov x1, #1
> + str x1, [x0, #GUEST_EXIT_CODE_OFFSET]
> + b guest_common_exit
> +
> +guest_common_exit:
> + stp x2, x3, [x0, #16]
> + stp x4, x5, [x0, #32]
> + stp x6, x7, [x0, #48]
> + stp x8, x9, [x0, #64]
> + stp x10, x11, [x0, #80]
> + stp x12, x13, [x0, #96]
> + stp x14, x15, [x0, #112]
> + stp x16, x17, [x0, #128]
> + stp x18, x19, [x0, #144]
> + stp x20, x21, [x0, #160]
> + stp x22, x23, [x0, #176]
> + stp x24, x25, [x0, #192]
> + stp x26, x27, [x0, #208]
> + stp x28, x29, [x0, #224]
> + str x30, [x0, #240]
> +
> + ldp x2, x3, [sp], #16
> + stp x2, x3, [x0, #0]
> +
> + mrs x1, elr_el2
> + str x1, [x0, #GUEST_ELR_OFFSET]
> + mrs x1, spsr_el2
> + str x1, [x0, #GUEST_SPSR_OFFSET]
> + mrs x1, esr_el2
> + str x1, [x0, #GUEST_ESR_OFFSET]
> + mrs x1, far_el2
> + str x1, [x0, #GUEST_FAR_OFFSET]
> + mrs x1, hpfar_el2
> + str x1, [x0, #GUEST_HPFAR_OFFSET]
> + mrs x1, sp_el1
> + str x1, [x0, #GUEST_SP_EL1_OFFSET]
> + mrs x1, ich_vmcr_el2
> + str x1, [x0, #GUEST_ICH_VMCR_EL2_OFFSET]
> +
> + /* x29 contains vector offset from entry */
> + mov x1, x29
> + bl guest_c_exception_handler
> + cbz x0, guest_resume_guest
> +
> + /* EXIT */
> + /* Restore Host Callee-Saved Regs */
> + ldp x19, x20, [sp], #16
> + ldp x21, x22, [sp], #16
> + ldp x23, x24, [sp], #16
> + ldp x25, x26, [sp], #16
> + ldp x27, x28, [sp], #16
> + ldp x29, x30, [sp], #16
> + ret
> +
> + /* RESUME */
> +guest_resume_guest:
> + mrs x0, tpidr_el2
> + ldr x1, [x0, #GUEST_ELR_OFFSET]
> + msr elr_el2, x1
> + ldr x1, [x0, #GUEST_SPSR_OFFSET]
> + msr spsr_el2, x1
> + ldr x1, [x0, #GUEST_SP_EL1_OFFSET]
> + msr sp_el1, x1
> +
> + ldp x1, x2, [x0, #8]
> + ldp x3, x4, [x0, #24]
> + ldp x5, x6, [x0, #40]
> + ldp x7, x8, [x0, #56]
> + ldp x9, x10, [x0, #72]
> + ldp x11, x12, [x0, #88]
> + ldp x13, x14, [x0, #104]
> + ldp x15, x16, [x0, #120]
> + ldp x17, x18, [x0, #136]
> + ldp x19, x20, [x0, #152]
> + ldp x21, x22, [x0, #168]
> + ldp x23, x24, [x0, #184]
> + ldp x25, x26, [x0, #200]
> + ldp x27, x28, [x0, #216]
> + ldp x29, x30, [x0, #232]
> + ldr x0, [x0, #0]
> + eret
> +
> +/* EL1 Vector Table */
> +.align 11
> +.global guest_el1_vectors
> +guest_el1_vectors:
> + /* Sync (0x000) */
> + .skip 0x200
> + /* Sync (0x200) */
> + stp x29, x30, [sp, #-16]!
> + mov x29, #0
> + b guest_el1_common
> + .skip 0x80 - 12
> + /* IRQ (0x280) */
> + stp x29, x30, [sp, #-16]!
> + mov x29, #1
> + b guest_el1_common
> + .skip 0x80 - 12
> + /* FIQ (0x300) */
> + stp x29, x30, [sp, #-16]!
> + mov x29, #2
> + b guest_el1_common
> + .skip 0x80 - 12
> + /* SError (0x380) */
> + stp x29, x30, [sp, #-16]!
> + mov x29, #3
> + b guest_el1_common
> + .skip 0x400
> +
> +guest_el1_common:
> + sub sp, sp, #264
> + stp x0, x1, [sp, #0]
> + stp x2, x3, [sp, #16]
> + stp x4, x5, [sp, #32]
> + stp x6, x7, [sp, #48]
> + stp x8, x9, [sp, #64]
> + stp x10, x11, [sp, #80]
> + stp x12, x13, [sp, #96]
> + stp x14, x15, [sp, #112]
> + stp x16, x17, [sp, #128]
> + stp x18, x19, [sp, #144]
> + stp x20, x21, [sp, #160]
> + stp x22, x23, [sp, #176]
> + stp x24, x25, [sp, #192]
> + stp x26, x27, [sp, #208]
> + stp x28, x30, [sp, #224]
> +
> + mrs x0, elr_el1
> + str x0, [sp, #248]
> + mrs x0, spsr_el1
> + str x0, [sp, #256]
> +
> + mov x0, sp
> + mov x1, x29
> + bl guest_el1_c_handler
> +
> + ldr x0, [sp, #248]
> + msr elr_el1, x0
> + ldr x0, [sp, #256]
> + msr spsr_el1, x0
> +
> + ldp x0, x1, [sp, #0]
> + ldp x2, x3, [sp, #16]
> + ldp x4, x5, [sp, #32]
> + ldp x6, x7, [sp, #48]
> + ldp x8, x9, [sp, #64]
> + ldp x10, x11, [sp, #80]
> + ldp x12, x13, [sp, #96]
> + ldp x14, x15, [sp, #112]
> + ldp x16, x17, [sp, #128]
> + ldp x18, x19, [sp, #144]
> + ldp x20, x21, [sp, #160]
> + ldp x22, x23, [sp, #176]
> + ldp x24, x25, [sp, #192]
> + ldp x26, x27, [sp, #208]
> + ldp x28, x30, [sp, #224]
> +
> + add sp, sp, #264
> + ldp x29, x30, [sp], #16
> + eret
> --
> 2.53.0.851.ga537e3e6e9-goog
>
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2026-03-24 15:44 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-16 22:43 [kvm-unit-tests PATCH v1 0/3] arm64: Add Stage-2 MMU and Nested Guest Framework Jing Zhang
2026-03-16 22:43 ` [kvm-unit-tests PATCH v1 1/3] lib: arm64: Add stage2 page table management library Jing Zhang
2026-03-24 15:12 ` Wei-Lin Chang
2026-03-16 22:43 ` [kvm-unit-tests PATCH v1 2/3] lib: arm64: Add bare-metal guest execution framework Jing Zhang
2026-03-17 1:46 ` Yao Yuan
2026-03-17 8:09 ` Marc Zyngier
2026-03-24 15:04 ` Joey Gouly
2026-03-24 15:44 ` Wei-Lin Chang
2026-03-16 22:43 ` [kvm-unit-tests PATCH v1 3/3] arm64: Add Stage-2 MMU demand paging test Jing Zhang
2026-03-24 11:43 ` [kvm-unit-tests PATCH v1 0/3] arm64: Add Stage-2 MMU and Nested Guest Framework Joey Gouly
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox