linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] KVM: arm64: Implement KVM_TRANSLATE ioctl for arm64
@ 2025-09-22 20:24 Priscilla Lam
  2025-09-22 23:27 ` Oliver Upton
  2025-09-23  8:03 ` Marc Zyngier
  0 siblings, 2 replies; 9+ messages in thread
From: Priscilla Lam @ 2025-09-22 20:24 UTC (permalink / raw)
  To: maz, oliver.upton, joey.gouly, suzuki.poulose, yuzenghui
  Cc: dwmw, gurugubs, christoffer.dall, graf, linux-arm-kernel, kvmarm,
	linux-kernel

There is a KVM_TRANSLATE ioctl for x86 to translate a GVA
(guest virtual address) to a GPA (guest physical address) in EL1
which is not yet implemented for arm64.

Implement KVM_TRANSLATE on arm64 for both configurations that
support and do not support VHE. The VHE path uses the AT
instruction directly while the non-VHE implementation wraps the
AT call in a hypercall to allow for its execution in EL2. Add
selftest that tests the ioctl in both configurations.

Signed-off-by: Priscilla Lam <prl@amazon.com>
---
 arch/arm64/include/asm/kvm_asm.h              |   2 +
 arch/arm64/kvm/guest.c                        |  89 ++++++++++++++-
 arch/arm64/kvm/hyp/nvhe/Makefile              |   3 +-
 arch/arm64/kvm/hyp/nvhe/hyp-main.c            |  10 ++
 arch/arm64/kvm/hyp/nvhe/translate.c           |  84 ++++++++++++++
 tools/testing/selftests/kvm/Makefile.kvm      |   1 +
 tools/testing/selftests/kvm/arm64/translate.c | 107 ++++++++++++++++++
 7 files changed, 292 insertions(+), 4 deletions(-)
 create mode 100644 arch/arm64/kvm/hyp/nvhe/translate.c
 create mode 100644 tools/testing/selftests/kvm/arm64/translate.c

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index bec227f9500a..56ecf4691650 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -87,6 +87,7 @@ enum __kvm_host_smccc_func {
 	__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
 	__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
 	__KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid,
+	__KVM_HOST_SMCCC_FUNC___kvm_hyp_translate,
 };
 
 #define DECLARE_KVM_VHE_SYM(sym)	extern char sym[]
@@ -289,6 +290,7 @@ asmlinkage void __noreturn hyp_panic_bad_stack(void);
 asmlinkage void kvm_unexpected_el2_exception(void);
 struct kvm_cpu_context;
 void handle_trap(struct kvm_cpu_context *host_ctxt);
+extern u64 __kvm_hyp_translate(struct kvm_vcpu *vcpu, u64 gva);
 asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on);
 void __noreturn __pkvm_init_finalise(void);
 void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 16ba5e9ac86c..180ea1df66cc 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -23,6 +23,7 @@
 #include <linux/uaccess.h>
 #include <asm/fpsimd.h>
 #include <asm/kvm.h>
+#include <asm/kvm_asm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_nested.h>
 #include <asm/sigcontext.h>
@@ -932,10 +933,92 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 	return -EINVAL;
 }
 
-int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
-				  struct kvm_translation *tr)
+static inline uint64_t par_to_ipa(uint64_t par, uint64_t va)
 {
-	return -EINVAL;
+	uint64_t offset = va & ((1ULL << PAGE_SHIFT) - 1);
+
+	return (par & GENMASK_ULL(51, 12)) | offset;
+}
+
+static int kvm_translate_vhe(struct kvm_vcpu *vcpu, struct kvm_translation *tr)
+{
+	unsigned long flags;
+	uint64_t hcr_old, hcr_new, par;
+	const uint64_t gva = tr->linear_address;
+
+	preempt_disable();
+	local_irq_save(flags);
+
+	/* Ensure we're in the expected VHE regime and enable S2 so PAR returns IPA. */
+	hcr_old = read_sysreg(hcr_el2);
+	hcr_new = hcr_old | HCR_E2H | HCR_VM;
+	hcr_new &= ~HCR_TGE;
+	write_sysreg(hcr_new, hcr_el2);
+	isb();
+
+	/* Load guest EL1 S1 context into *_EL12 (do not write into _EL1). */
+	write_sysreg_s(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0_EL12);
+	write_sysreg_s(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1_EL12);
+	write_sysreg_s(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR_EL12);
+	write_sysreg_s(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR_EL12);
+	write_sysreg_s(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR_EL12);
+
+	/* Check address read */
+	asm volatile("at s1e1r, %0" :: "r"(gva));
+	isb();
+
+	par = read_sysreg(par_el1);
+	if (!(par & 1)) {
+		tr->valid = true;
+		tr->physical_address = par_to_ipa(par, gva);
+	}
+
+	/* Check address write */
+	asm volatile("at s1e1w, %0" :: "r"(gva));
+	isb();
+
+	par = read_sysreg(par_el1);
+
+	if (!(par & 1)) {
+		tr->valid = true;
+		tr->writeable = true;
+		tr->physical_address = par_to_ipa(par, gva);
+	}
+
+	/* Restore HCR_EL2 and exit */
+	write_sysreg(hcr_old, hcr_el2);
+	isb();
+	local_irq_restore(flags);
+	preempt_enable();
+
+	return 0;
+}
+
+static int kvm_translate_nvhe(struct kvm_vcpu *vcpu, struct kvm_translation *tr)
+{
+	u64 ret;
+
+	preempt_disable();
+	local_irq_disable();
+	ret = kvm_call_hyp_nvhe(__kvm_hyp_translate, vcpu, tr->linear_address);
+	local_irq_enable();
+	preempt_enable();
+
+	/* Unpack result: IPA in bits 63:8, valid in bit 4, writeable in bit 0 */
+	tr->physical_address = ret >> 8;
+	tr->valid = !!(ret & (1ULL << 4));
+	tr->writeable = !!(ret & 1ULL);
+	tr->usermode = 0;
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr)
+{
+	if (has_vhe())
+		return kvm_translate_vhe(vcpu, tr);
+	else
+		return kvm_translate_nvhe(vcpu, tr);
 }
 
 /**
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 0b0a68b663d4..bcbd4e5125b1 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -24,7 +24,8 @@ CFLAGS_switch.nvhe.o += -Wno-override-init
 
 hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
 	 hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
-	 cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
+	 cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o \
+	 translate.o
 hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
 	 ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
 hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 3206b2c07f82..a52cf002822c 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -573,6 +573,15 @@ static void handle___pkvm_teardown_vm(struct kvm_cpu_context *host_ctxt)
 	cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle);
 }
 
+static void handle___kvm_hyp_translate(struct kvm_cpu_context *host_ctxt)
+{
+	DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 1);
+	DECLARE_REG(u64, gva, host_ctxt, 2);
+
+	host_vcpu = kern_hyp_va(host_vcpu);
+	cpu_reg(host_ctxt, 1) = __kvm_hyp_translate(host_vcpu, gva);
+}
+
 typedef void (*hcall_t)(struct kvm_cpu_context *);
 
 #define HANDLE_FUNC(x)	[__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
@@ -612,6 +621,7 @@ static const hcall_t host_hcall[] = {
 	HANDLE_FUNC(__pkvm_vcpu_load),
 	HANDLE_FUNC(__pkvm_vcpu_put),
 	HANDLE_FUNC(__pkvm_tlb_flush_vmid),
+	HANDLE_FUNC(__kvm_hyp_translate),
 };
 
 static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/translate.c b/arch/arm64/kvm/hyp/nvhe/translate.c
new file mode 100644
index 000000000000..239a095a015d
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/translate.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2025 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Author: Priscilla Lam <prl@amazon.com>
+ */
+
+#include <asm/sysreg.h>
+#include <hyp/sysreg-sr.h>
+#include <nvhe/mem_protect.h>
+
+static __always_inline u64 par_to_ipa(u64 par, u64 va)
+{
+	u64 offset = va & ((1ULL << PAGE_SHIFT) - 1);
+
+	return (par & GENMASK_ULL(51, 12)) | offset;
+}
+
+/**
+ * __kvm_hyp_translate - hypercall that translates a GVA to GPA when VHE is not enabled or available
+ * @vcpu: the vCPU pointer
+ * @gva: the guest virtual address
+ *
+ * This returns the result in a packed integer. The GPA if successful will be in bits 63:8, the
+ * validity in bit 4, and if the address is writeable in bit 0.
+ */
+u64 __kvm_hyp_translate(struct kvm_vcpu *vcpu, u64 gva)
+{
+	struct kvm_cpu_context *host_ctxt;
+	struct kvm_cpu_context *guest_ctxt;
+	struct kvm_s2_mmu *mmu;
+
+	u64 hcr_old = read_sysreg(hcr_el2);
+	u64 par = 0;
+	u64 gpa = 0;
+	bool valid = false;
+	bool writeable = false;
+
+	host_ctxt = host_data_ptr(host_ctxt);
+	host_ctxt->__hyp_running_vcpu = vcpu;
+	guest_ctxt = &vcpu->arch.ctxt;
+
+	__sysreg_save_state_nvhe(host_ctxt);
+	__debug_save_host_buffers_nvhe(vcpu);
+
+	dsb(nsh);
+
+	__sysreg_restore_state_nvhe(guest_ctxt);
+
+	mmu = kern_hyp_va(vcpu->arch.hw_mmu);
+	__load_stage2(mmu, kern_hyp_va(mmu->arch));
+
+	write_sysreg((hcr_old | HCR_E2H | HCR_VM) & ~HCR_TGE, hcr_el2);
+	isb();
+
+	asm volatile("at s1e1r, %0" :: "r"(gva));
+	isb();
+
+	par = read_sysreg(par_el1);
+
+	if (!(par & 1)) {
+		gpa = par_to_ipa(par, gva);
+		valid = true;
+	}
+
+	if (valid) {
+		asm volatile("at s1e1w, %0" :: "r"(gva));
+		isb();
+
+		par = read_sysreg(par_el1);
+		if (!(par & 1))
+			writeable = true;
+	}
+
+	write_sysreg(hcr_old, hcr_el2);
+	isb();
+
+	__load_host_stage2();
+	__sysreg_restore_state_nvhe(host_ctxt);
+	__debug_restore_host_buffers_nvhe(vcpu);
+	host_ctxt->__hyp_running_vcpu = NULL;
+
+	// Pack result: GPA in bits 63:8, valid in bit 4, writeable in bit 0
+	return (gpa << 8) | (valid ? (1ULL << 4) : 0) | (writeable ? 1ULL : 0);
+}
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 41b40c676d7f..894b1b888ce4 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -163,6 +163,7 @@ TEST_GEN_PROGS_arm64 += arm64/page_fault_test
 TEST_GEN_PROGS_arm64 += arm64/psci_test
 TEST_GEN_PROGS_arm64 += arm64/set_id_regs
 TEST_GEN_PROGS_arm64 += arm64/smccc_filter
+TEST_GEN_PROGS_arm64 += arm64/translate
 TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config
 TEST_GEN_PROGS_arm64 += arm64/vgic_init
 TEST_GEN_PROGS_arm64 += arm64/vgic_irq
diff --git a/tools/testing/selftests/kvm/arm64/translate.c b/tools/testing/selftests/kvm/arm64/translate.c
new file mode 100644
index 000000000000..5cdc975ae52a
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/translate.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * translate: Test the KVM_TRANSLATE ioctl on AArch64 by setting up
+ * guest page table mappings and verifying that the ioctl correctly
+ * translates guest virtual addresses to guest physical addresses.
+ */
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define GUEST_TEST_GVA1		0x400000
+#define GUEST_TEST_GVA2		0x500000
+#define GUEST_UNMAPPED_GVA	0x600000
+
+/* AArch64 page table entry flags */
+#define PTE_RDONLY		(1ULL << 7)	/* AP[2] - Read-only */
+
+static void guest_code(void)
+{
+	GUEST_DONE();
+}
+
+/*
+ * Create a read-only page mapping by first creating a normal mapping
+ * and then modifying the PTE to add the read-only flag.
+ */
+static void virt_pg_map_readonly(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+	uint64_t *ptep;
+
+	/* First create a normal read-write mapping */
+	virt_pg_map(vm, vaddr, paddr);
+
+	/* Now find the PTE and modify it to be read-only */
+	ptep = virt_get_pte_hva(vm, vaddr);
+	TEST_ASSERT(ptep, "Failed to get PTE for GVA 0x%lx", vaddr);
+
+	/* Set the read-only bit in the PTE */
+	*ptep |= PTE_RDONLY;
+}
+
+int main(void)
+{
+	struct kvm_translation tr;
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	vm_vaddr_t gva1, gva2;
+	vm_paddr_t gpa1, gpa2;
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+	/* Set up two different GVA to GPA mappings with different permissions. */
+	gva1 = GUEST_TEST_GVA1;
+	gpa1 = vm_phy_page_alloc(vm, vm->page_size, vm->memslots[MEM_REGION_TEST_DATA]);
+	printf("Allocated GPA1: 0x%lx for GVA1: 0x%lx\n", (unsigned long)gpa1, (unsigned long)gva1);
+	virt_pg_map(vm, gva1, gpa1);  /* Read-write mapping */
+
+	gva2 = GUEST_TEST_GVA2;
+	gpa2 = vm_phy_page_alloc(vm, vm->page_size, vm->memslots[MEM_REGION_TEST_DATA]);
+	printf("Allocated GPA2: 0x%lx for GVA2: 0x%lx\n", (unsigned long)gpa2, (unsigned long)gva2);
+	virt_pg_map_readonly(vm, gva2, gpa2);  /* Read-only mapping */
+
+	/*
+	 * The vCPU must be run at least once to initialize the system
+	 * registers needed for guest address translation.
+	 */
+	vcpu_run(vcpu);
+	TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+	/* Verify the first mapping (read-write) translates correctly. */
+	memset(&tr, 0, sizeof(tr));
+	tr.linear_address = gva1;
+	vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr);
+
+	printf("RW mapping: GVA=0x%lx -> GPA=0x%llx, valid=%d, writeable=%d\n",
+	       (unsigned long)gva1, (unsigned long long)tr.physical_address,
+	       tr.valid, tr.writeable);
+	TEST_ASSERT(tr.valid, "Translation should succeed for mapped GVA");
+	TEST_ASSERT_EQ(tr.physical_address, gpa1);
+	TEST_ASSERT(tr.writeable, "Read-write GVA should be writeable");
+
+	/* Verify the second mapping (read-only) translates correctly. */
+	memset(&tr, 0, sizeof(tr));
+	tr.linear_address = gva2;
+	vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr);
+
+	printf("RO mapping: GVA=0x%lx -> GPA=0x%llx, valid=%d, writeable=%d\n",
+	       (unsigned long)gva2, (unsigned long long)tr.physical_address,
+	       tr.valid, tr.writeable);
+	TEST_ASSERT(tr.valid, "Translation should succeed for mapped GVA");
+	TEST_ASSERT_EQ(tr.physical_address, gpa2);
+	TEST_ASSERT(!tr.writeable, "Read-only GVA should not be writeable");
+
+	/* Verify that an unmapped GVA is reported as invalid. */
+	memset(&tr, 0, sizeof(tr));
+	tr.linear_address = GUEST_UNMAPPED_GVA;
+	vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr);
+
+	printf("Unmapped: GVA=0x%lx -> GPA=0x%llx, valid=%d, writeable=%d\n",
+	       (unsigned long)GUEST_UNMAPPED_GVA, (unsigned long long)tr.physical_address,
+	       tr.valid, tr.writeable);
+	TEST_ASSERT(!tr.valid, "Translation should fail for unmapped GVA");
+
+	kvm_vm_free(vm);
+	return 0;
+}
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] KVM: arm64: Implement KVM_TRANSLATE ioctl for arm64
  2025-09-22 20:24 [PATCH] KVM: arm64: Implement KVM_TRANSLATE ioctl for arm64 Priscilla Lam
@ 2025-09-22 23:27 ` Oliver Upton
  2025-09-23  8:03 ` Marc Zyngier
  1 sibling, 0 replies; 9+ messages in thread
From: Oliver Upton @ 2025-09-22 23:27 UTC (permalink / raw)
  To: Priscilla Lam
  Cc: maz, joey.gouly, suzuki.poulose, yuzenghui, dwmw, gurugubs,
	christoffer.dall, graf, linux-arm-kernel, kvmarm, linux-kernel

Hi Priscilla,

On Mon, Sep 22, 2025 at 01:24:52PM -0700, Priscilla Lam wrote:
> There is a KVM_TRANSLATE ioctl for x86 to translate a GVA
> (guest virtual address) to a GPA (guest physical address) in EL1
> which is not yet implemented for arm64.
> 
> Implement KVM_TRANSLATE on arm64 for both configurations that
> support and do not support VHE. The VHE path uses the AT
> instruction directly while the non-VHE implementation wraps the
> AT call in a hypercall to allow for its execution in EL2. Add
> selftest that tests the ioctl in both configurations.
> 
> Signed-off-by: Priscilla Lam <prl@amazon.com>
> ---
>  arch/arm64/include/asm/kvm_asm.h              |   2 +
>  arch/arm64/kvm/guest.c                        |  89 ++++++++++++++-
>  arch/arm64/kvm/hyp/nvhe/Makefile              |   3 +-
>  arch/arm64/kvm/hyp/nvhe/hyp-main.c            |  10 ++
>  arch/arm64/kvm/hyp/nvhe/translate.c           |  84 ++++++++++++++
>  tools/testing/selftests/kvm/Makefile.kvm      |   1 +
>  tools/testing/selftests/kvm/arm64/translate.c | 107 ++++++++++++++++++

Please do selftests changes in a separate patch.

> +static int kvm_translate_vhe(struct kvm_vcpu *vcpu, struct kvm_translation *tr)

So arch/arm64/kvm/at.c exists for this exact purpose: walking guest page
tables. While it was previously constrained to handling NV-enabled VMs,
Marc's SEA TTW series opens up the stage-1 walker for general use.

I am not keen on adding yet another page-table walker implementation for
the sole purpose of servicing this ioctl.

https://lore.kernel.org/kvmarm/175845226586.1792635.11249464012956393475.b4-ty@kernel.org

> +{
> +	unsigned long flags;
> +	uint64_t hcr_old, hcr_new, par;
> +	const uint64_t gva = tr->linear_address;

"linear_address" is a delightful x86-ism. I'd prefer naming that was
either architecture-generic -or- an arm64-specific struct that used our
architectural terms.

> +	preempt_disable();
> +	local_irq_save(flags);
> +
> +	/* Ensure we're in the expected VHE regime and enable S2 so PAR returns IPA. */
> +	hcr_old = read_sysreg(hcr_el2);
> +	hcr_new = hcr_old | HCR_E2H | HCR_VM;
> +	hcr_new &= ~HCR_TGE;
> +	write_sysreg(hcr_new, hcr_el2);
> +	isb();

Thanks to borken hardware, this needs to go through the write_sysreg_hcr()
accessor.

> +
> +	/* Load guest EL1 S1 context into *_EL12 (do not write into _EL1). */
> +	write_sysreg_s(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0_EL12);
> +	write_sysreg_s(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1_EL12);
> +	write_sysreg_s(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR_EL12);
> +	write_sysreg_s(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR_EL12);
> +	write_sysreg_s(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR_EL12);

KVM supports both FEAT_S1PIE and FEAT_S1POE, so this is not a complete
MMU context.

> +	/* Check address read */
> +	asm volatile("at s1e1r, %0" :: "r"(gva));
> +	isb();

The AT instruction can generate an exception, which is why __kvm_at()
exists.

And this is where reusing the existing translation infrastructure is
really important. The AT instruction *will* fail if the stage-1
translation tables are unmapped at stage-2. The only option at that
point is falling back to a software table walker that potentially faults
in the missing translation tables.

> +
> +	par = read_sysreg(par_el1);
> +	if (!(par & 1)) {
> +		tr->valid = true;
> +		tr->physical_address = par_to_ipa(par, gva);

What about permissions besides RW?

> +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr)
> +{
> +	if (has_vhe())
> +		return kvm_translate_vhe(vcpu, tr);
> +	else
> +		return kvm_translate_nvhe(vcpu, tr);
>  }

Yet another interesting consideration around this entire infrastructure
is the guest's view of the translation that the VMM will now use. KVM uses a
pseudo-TLB for the guest's VNCR page and maintains it just like a literal
TLB.

How would the guest invalidate the translation fetched by the VMM when
unmapping/remapping the VA? Doesn't the stage-1 PTW need to set the
Access flag as this amounts to a TLB fill?

Understanding what it is you're trying to accomplish would be quite
helpful. I'm concerned this trivializes some of the gory details of
participating in the guest's virtual memory.

Thanks,
Oliver


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] KVM: arm64: Implement KVM_TRANSLATE ioctl for arm64
  2025-09-22 20:24 [PATCH] KVM: arm64: Implement KVM_TRANSLATE ioctl for arm64 Priscilla Lam
  2025-09-22 23:27 ` Oliver Upton
@ 2025-09-23  8:03 ` Marc Zyngier
  2025-09-23  8:29   ` Priscilla Lam
  1 sibling, 1 reply; 9+ messages in thread
From: Marc Zyngier @ 2025-09-23  8:03 UTC (permalink / raw)
  To: Priscilla Lam
  Cc: oliver.upton, joey.gouly, suzuki.poulose, yuzenghui, dwmw,
	gurugubs, christoffer.dall, graf, linux-arm-kernel, kvmarm,
	linux-kernel

Hi Priscilla,

On Mon, 22 Sep 2025 21:24:52 +0100,
Priscilla Lam <prl@amazon.com> wrote:
> 
> There is a KVM_TRANSLATE ioctl for x86 to translate a GVA
> (guest virtual address) to a GPA (guest physical address) in EL1
> which is not yet implemented for arm64.
> 
> Implement KVM_TRANSLATE on arm64 for both configurations that
> support and do not support VHE. The VHE path uses the AT
> instruction directly while the non-VHE implementation wraps the
> AT call in a hypercall to allow for its execution in EL2. Add
> selftest that tests the ioctl in both configurations.
> 
> Signed-off-by: Priscilla Lam <prl@amazon.com>
> ---
>  arch/arm64/include/asm/kvm_asm.h              |   2 +
>  arch/arm64/kvm/guest.c                        |  89 ++++++++++++++-
>  arch/arm64/kvm/hyp/nvhe/Makefile              |   3 +-
>  arch/arm64/kvm/hyp/nvhe/hyp-main.c            |  10 ++
>  arch/arm64/kvm/hyp/nvhe/translate.c           |  84 ++++++++++++++
>  tools/testing/selftests/kvm/Makefile.kvm      |   1 +
>  tools/testing/selftests/kvm/arm64/translate.c | 107 ++++++++++++++++++
>  7 files changed, 292 insertions(+), 4 deletions(-)
>  create mode 100644 arch/arm64/kvm/hyp/nvhe/translate.c
>  create mode 100644 tools/testing/selftests/kvm/arm64/translate.c

Oliver already gave a review of some aspects of the code. In short,
you're going about walking the page tables the wrong way, both by
ignoring some of the complicated architectural features (PIE, POE),
trusting the S1 PTs to be vaguely correct, and by assuming that S1 PTs
are never swapped out.

But there is more: you are assuming that the only translation regime
we care about is EL1&0, which isn't true. This would also need to
cater for EL2 and EL2&0.

As Oliver also pointed out, all that infrastructure already exists in
the kernel, and is able to do the right thing, with full support of
the expected feature set as presented to the guest.

But at the end of the day, what do you need KVM_TRANSLATE for? This
interface is an absolute turd that is unable to represent the bare
minimum of the architecture (writable by whom? physical address in
which translation regime? what about S2 translations?), and is better
left in the "utter brain fart" category.

Finally, and assuming that you actually need something of the sort,
why do you expect that something in the kernel will be better than a
pure userspace implementation?

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Re: [PATCH] KVM: arm64: Implement KVM_TRANSLATE ioctl for arm64
  2025-09-23  8:03 ` Marc Zyngier
@ 2025-09-23  8:29   ` Priscilla Lam
  2025-09-23  8:39     ` Alexander Graf
  2025-09-23  9:25     ` Marc Zyngier
  0 siblings, 2 replies; 9+ messages in thread
From: Priscilla Lam @ 2025-09-23  8:29 UTC (permalink / raw)
  To: maz, oliver.upton
  Cc: christoffer.dall, dwmw, graf, gurugubs, jgrall, joey.gouly,
	kvmarm, linux-arm-kernel, linux-kernel, prl, suzuki.poulose,
	yuzenghui

Hi Oliver and Marc,

Thanks for the detailed feedback.

> But at the end of the day, what do you need KVM_TRANSLATE for? This
> interface is an absolute turd that is unable to represent the bare
> minimum of the architecture (writable by whom? physical address in
> which translation regime? what about S2 translations?), and is better
> left in the "utter brain fart" category.

Regarding motivation, this patch is intended to give a userspace vmm
the ability to handle non-ISV guest faults. The Arm Arm (DDI 0487L.b,
section B3.13.6) notes that for load/store pair faults, the syndrome
may not provide the specifics of the access that faulted. In those
cases, the vmm must manually decode the instruction to emulate it. The
introduction of KVM_CAP_ARM_NISV_TO_USER
(https://lore.kernel.org/kvm/20191120164236.29359-2-maz@kernel.org/)
seems to have anticipated that flow by allowing exits to userspace on
trapped NISV instructions. What is still missing is a reliable way for
userspace to query VA->IPA translations in order to complete emulation.

> Please do selftests changes in a separate patch.

Ack, will split the kernel changes and selftests into 1/2 and 2/2.

> So arch/arm64/kvm/at.c exists for this exact purpose: walking guest page
> tables. While it was previously constrained to handling NV-enabled VMs,
> Marc's SEA TTW series opens up the stage-1 walker for general use.

Thanks for the reference, I wasn't aware of this. I'll drop the bespoke 
VHE/NVHE paths and use the shared S1 walker in v2.

> "linear_address" is a delightful x86-ism. I'd prefer naming that was
> either architecture-generic -or- an arm64-specific struct that used our
> architectural terms.

I'll switch internal naming to VA/IPA. For uAPI, I'll retain the field
for compatibility and translate internally.

> Thanks to borken hardware, this needs to go through the write_sysreg_hcr()
> accessor.

Ack, will use write_sysreg_hcr().

> KVM supports both FEAT_S1PIE and FEAT_S1POE, so this is not a complete
> MMU context.

Understood. v2 will rely on the shared walker to avoid missing S1PIE/S1POE.

> The AT instruction can generate an exception, which is why __kvm_at()
> exists.
>
> And this is where reusing the existing translation infrastructure is
> really important. The AT instruction *will* fail if the stage-1
> translation tables are unmapped at stage-2. The only option at that
> point is falling back to a software table walker that potentially faults
> in the missing translation tables.

v2 will use __kvm_at() and the fallback software walk.

> What about permissions besides RW?

I'll add support for the additional bit (execute and EL0) in v2.

> Yet another interesting consideration around this entire infrastructure
> is the guest's view of the translation that the VMM will now use. KVM
> uses a pseudo-TLB for the guest's VNCR page and maintains it just like a
> literal TLB.
>
> How would the guest invalidate the translation fetched by the VMM when
> unmapping/remapping the VA? Doesn't the stage-1 PTW need to set the
> Access flag as this amounts to a TLB fill?
> Understanding what it is you're trying to accomplish would be quite
> helpful. I'm concerned this trivializes some of the gory details of
> participating in the guest's virtual memory.

My intent is for this ioctl to be side-effect free with no AF updates 
and guest-visible TLB fills. I’ll send v2 as two patches with the above
changes.

Thanks,
Priscilla


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] KVM: arm64: Implement KVM_TRANSLATE ioctl for arm64
  2025-09-23  8:29   ` Priscilla Lam
@ 2025-09-23  8:39     ` Alexander Graf
  2025-09-23  9:02       ` David Woodhouse
  2025-09-23  9:25     ` Marc Zyngier
  1 sibling, 1 reply; 9+ messages in thread
From: Alexander Graf @ 2025-09-23  8:39 UTC (permalink / raw)
  To: Priscilla Lam, maz, oliver.upton
  Cc: christoffer.dall, dwmw, gurugubs, jgrall, joey.gouly, kvmarm,
	linux-arm-kernel, linux-kernel, suzuki.poulose, yuzenghui

Hi Priscilla,

On 23.09.25 10:29, Priscilla Lam wrote:
> Hi Oliver and Marc,
>
> Thanks for the detailed feedback.
>
>> But at the end of the day, what do you need KVM_TRANSLATE for? This
>> interface is an absolute turd that is unable to represent the bare
>> minimum of the architecture (writable by whom? physical address in
>> which translation regime? what about S2 translations?), and is better
>> left in the "utter brain fart" category.
> Regarding motivation, this patch is intended to give a userspace vmm
> the ability to handle non-ISV guest faults. The Arm Arm (DDI 0487L.b,
> section B3.13.6) notes that for load/store pair faults, the syndrome
> may not provide the specifics of the access that faulted. In those
> cases, the vmm must manually decode the instruction to emulate it. The
> introduction of KVM_CAP_ARM_NISV_TO_USER
> (https://lore.kernel.org/kvm/20191120164236.29359-2-maz@kernel.org/)
> seems to have anticipated that flow by allowing exits to userspace on
> trapped NISV instructions. What is still missing is a reliable way for
> userspace to query VA->IPA translations in order to complete emulation.


All modern OSs constrain themselves to only ISV enabled MMIO 
instructions. NISV is really only useful to run non hypervisor 
enlightened guests. Did you encounter a real world OS that was causing 
NISV faults? And if so, what was causing these? It's most likely a 
driver or OS bug.

Alex




Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] KVM: arm64: Implement KVM_TRANSLATE ioctl for arm64
  2025-09-23  8:39     ` Alexander Graf
@ 2025-09-23  9:02       ` David Woodhouse
  2025-09-23 18:05         ` Christoffer Dall
  0 siblings, 1 reply; 9+ messages in thread
From: David Woodhouse @ 2025-09-23  9:02 UTC (permalink / raw)
  To: Alexander Graf, Priscilla Lam, maz, oliver.upton
  Cc: christoffer.dall, gurugubs, jgrall, joey.gouly, kvmarm,
	linux-arm-kernel, linux-kernel, suzuki.poulose, yuzenghui

[-- Attachment #1: Type: text/plain, Size: 1168 bytes --]

On Tue, 2025-09-23 at 10:39 +0200, Alexander Graf wrote:
> 
> All modern OSs constrain themselves to only ISV enabled MMIO 
> instructions.

Why? Such does not appear to be required by the architecture, AFAICT?

And what is "MMIO" architecturally anyway? Using LDP is actually fine
on MMIO, if a PCI BAR is passed through to a guest (and assuming there
are no ordering dependencies on the loads). It's the fact that it's
missing from stage2 page tables and causes a guest exit which is the
problem.

Other than lying to a compiler and *pretending* there are ordering
constraints, or explicitly using inline asm, how would a guest OS
*prevent* the compiler from using problematic instructions?

>  NISV is really only useful to run non hypervisor 
> enlightened guests. Did you encounter a real world OS that was causing 
> NISV faults? And if so, what was causing these? It's most likely a 
> driver or OS bug.

The Windows CRB TPM driver uses LDP. Even if it *were* forbidden by the
architecture, the track record of getting Microsoft to fix bugs *even*
when they are egregious violations of both specifications and common
sense is... poor.

[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5069 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] KVM: arm64: Implement KVM_TRANSLATE ioctl for arm64
  2025-09-23  8:29   ` Priscilla Lam
  2025-09-23  8:39     ` Alexander Graf
@ 2025-09-23  9:25     ` Marc Zyngier
  2025-09-25  5:21       ` Priscilla Lam
  1 sibling, 1 reply; 9+ messages in thread
From: Marc Zyngier @ 2025-09-23  9:25 UTC (permalink / raw)
  To: Priscilla Lam
  Cc: oliver.upton, christoffer.dall, dwmw, graf, gurugubs, jgrall,
	joey.gouly, kvmarm, linux-arm-kernel, linux-kernel,
	suzuki.poulose, yuzenghui

On Tue, 23 Sep 2025 09:29:55 +0100,
Priscilla Lam <prl@amazon.com> wrote:
> 
> Hi Oliver and Marc,
> 
> Thanks for the detailed feedback.
> 
> > But at the end of the day, what do you need KVM_TRANSLATE for? This
> > interface is an absolute turd that is unable to represent the bare
> > minimum of the architecture (writable by whom? physical address in
> > which translation regime? what about S2 translations?), and is better
> > left in the "utter brain fart" category.
> 
> Regarding motivation, this patch is intended to give a userspace vmm
> the ability to handle non-ISV guest faults. The Arm Arm (DDI 0487L.b,
> section B3.13.6) notes that for load/store pair faults, the syndrome
> may not provide the specifics of the access that faulted. In those
> cases, the vmm must manually decode the instruction to emulate it. The
> introduction of KVM_CAP_ARM_NISV_TO_USER
> (https://lore.kernel.org/kvm/20191120164236.29359-2-maz@kernel.org/)
> seems to have anticipated that flow by allowing exits to userspace on
> trapped NISV instructions. What is still missing is a reliable way for
> userspace to query VA->IPA translations in order to complete emulation.

A guest doing this is a sure indication that it is completely broken,
and will fail on actual HW, because it clearly ignores small
insignificant details such as *ordering*.

My other question still remains: why can't you perform this page table
walk in userspace? It is actually much safer to do so because you can
stop other vcpus while inspecting the PTs, and avoid a vcpu playing
tricks behind your back -- something the in-kernel PTW doesn't try to
avoid.

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] KVM: arm64: Implement KVM_TRANSLATE ioctl for arm64
  2025-09-23  9:02       ` David Woodhouse
@ 2025-09-23 18:05         ` Christoffer Dall
  0 siblings, 0 replies; 9+ messages in thread
From: Christoffer Dall @ 2025-09-23 18:05 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Alexander Graf, Priscilla Lam, Marc Zyngier,
	oliver.upton@linux.dev, gurugubs@amazon.com, jgrall@amazon.co.uk,
	Joey Gouly, kvmarm@lists.linux.dev,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, Suzuki Poulose,
	yuzenghui@huawei.com



> On 23 Sep 2025, at 11.02, David Woodhouse <dwmw2@infradead.org> wrote:
> 
> On Tue, 2025-09-23 at 10:39 +0200, Alexander Graf wrote:
>> 
>> All modern OSs constrain themselves to only ISV enabled MMIO 
>> instructions.
> 
> Why? Such does not appear to be required by the architecture, AFAICT?


FYI, we are in the process of clarifying this area of limitation the architecture’s support for virtualization.

There is a note on the expectations for LDP and STP here:
https://developer.arm.com/documentation/102374/0103/Loads-and-stores---load-pair-and-store-pair?lang=en


Thanks,
Christoffer


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] KVM: arm64: Implement KVM_TRANSLATE ioctl for arm64
  2025-09-23  9:25     ` Marc Zyngier
@ 2025-09-25  5:21       ` Priscilla Lam
  0 siblings, 0 replies; 9+ messages in thread
From: Priscilla Lam @ 2025-09-25  5:21 UTC (permalink / raw)
  To: maz
  Cc: christoffer.dall, dwmw, graf, gurugubs, jgrall, joey.gouly,
	kvmarm, linux-arm-kernel, linux-kernel, oliver.upton, prl,
	suzuki.poulose, yuzenghui

On 9/23/25, 2:26 AM, "Marc Zyngier" <maz@kernel.org> wrote:
> A guest doing this is a sure indication that it is completely broken,
> and will fail on actual HW, because it clearly ignores small
> insignificant details such as *ordering*.
> 
> My other question still remains: why can't you perform this page table
> walk in userspace? It is actually much safer to do so because you can
> stop other vcpus while inspecting the PTs, and avoid a vcpu playing
> tricks behind your back -- something the in-kernel PTW doesn't try to
> avoid.

In our case, this comes from the Windows TPM driver using ldp32 to read
static adjacent CRB fields where ordering doesn't matter, but I agree 
that a guest relying on this kind of translation is problematic. We'll
drop this KVM_TRANSLATE implementation and pursue a different approach
instead.

Thanks,
Priscilla


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2025-09-25  5:21 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-09-22 20:24 [PATCH] KVM: arm64: Implement KVM_TRANSLATE ioctl for arm64 Priscilla Lam
2025-09-22 23:27 ` Oliver Upton
2025-09-23  8:03 ` Marc Zyngier
2025-09-23  8:29   ` Priscilla Lam
2025-09-23  8:39     ` Alexander Graf
2025-09-23  9:02       ` David Woodhouse
2025-09-23 18:05         ` Christoffer Dall
2025-09-23  9:25     ` Marc Zyngier
2025-09-25  5:21       ` Priscilla Lam

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).