From: Marc Zyngier <maz@kernel.org>
To: kvmarm@lists.linux.dev, kvm@vger.kernel.org,
linux-arm-kernel@lists.infradead.org
Cc: James Morse <james.morse@arm.com>,
Suzuki K Poulose <suzuki.poulose@arm.com>,
Oliver Upton <oliver.upton@linux.dev>,
Zenghui Yu <yuzenghui@huawei.com>,
Joey Gouly <joey.gouly@arm.com>,
Alexandru Elisei <alexandru.elisei@arm.com>,
Christoffer Dall <christoffer.dall@arm.com>,
Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
Subject: [PATCH v3 02/16] KVM: arm64: nv: Implement nested Stage-2 page table walk logic
Date: Fri, 14 Jun 2024 15:45:38 +0100 [thread overview]
Message-ID: <20240614144552.2773592-3-maz@kernel.org> (raw)
In-Reply-To: <20240614144552.2773592-1-maz@kernel.org>
From: Christoffer Dall <christoffer.dall@linaro.org>
Based on the pseudo-code in the ARM ARM, implement a stage 2 software
page table walker.
Co-developed-by: Jintack Lim <jintack.lim@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
arch/arm64/include/asm/esr.h | 1 +
arch/arm64/include/asm/kvm_nested.h | 13 ++
arch/arm64/kvm/nested.c | 264 ++++++++++++++++++++++++++++
3 files changed, 278 insertions(+)
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 7abf09df7033..15a4be765cad 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -152,6 +152,7 @@
#define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0))
/* ISS field definitions for exceptions taken in to Hyp */
+#define ESR_ELx_FSC_ADDRSZ (0x00)
#define ESR_ELx_CV (UL(1) << 24)
#define ESR_ELx_COND_SHIFT (20)
#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index a69faee31342..5404b7b843cf 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -68,6 +68,19 @@ extern struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu);
extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu);
extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu);
+struct kvm_s2_trans {
+ phys_addr_t output;
+ unsigned long block_size;
+ bool writable;
+ bool readable;
+ int level;
+ u32 esr;
+ u64 upper_attr;
+};
+
+extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
+ struct kvm_s2_trans *result);
+
int kvm_init_nv_sysregs(struct kvm *kvm);
#ifdef CONFIG_ARM64_PTR_AUTH
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 0a6b894b6390..2fcfaeaeef98 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -91,6 +91,270 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
return 0;
}
+struct s2_walk_info {
+ int (*read_desc)(phys_addr_t pa, u64 *desc, void *data);
+ void *data;
+ u64 baddr;
+ unsigned int max_oa_bits;
+ unsigned int pgshift;
+ unsigned int sl;
+ unsigned int t0sz;
+ bool be;
+};
+
+static unsigned int ps_to_output_size(unsigned int ps)
+{
+ switch (ps) {
+ case 0: return 32;
+ case 1: return 36;
+ case 2: return 40;
+ case 3: return 42;
+ case 4: return 44;
+ case 5:
+ default:
+ return 48;
+ }
+}
+
+static u32 compute_fsc(int level, u32 fsc)
+{
+ return fsc | (level & 0x3);
+}
+
+static int get_ia_size(struct s2_walk_info *wi)
+{
+ return 64 - wi->t0sz;
+}
+
+static int check_base_s2_limits(struct s2_walk_info *wi,
+ int level, int input_size, int stride)
+{
+ int start_size, ia_size;
+
+ ia_size = get_ia_size(wi);
+
+ /* Check translation limits */
+ switch (BIT(wi->pgshift)) {
+ case SZ_64K:
+ if (level == 0 || (level == 1 && ia_size <= 42))
+ return -EFAULT;
+ break;
+ case SZ_16K:
+ if (level == 0 || (level == 1 && ia_size <= 40))
+ return -EFAULT;
+ break;
+ case SZ_4K:
+ if (level < 0 || (level == 0 && ia_size <= 42))
+ return -EFAULT;
+ break;
+ }
+
+ /* Check input size limits */
+ if (input_size > ia_size)
+ return -EFAULT;
+
+ /* Check number of entries in starting level table */
+ start_size = input_size - ((3 - level) * stride + wi->pgshift);
+ if (start_size < 1 || start_size > stride + 4)
+ return -EFAULT;
+
+ return 0;
+}
+
+/* Check if output is within boundaries */
+static int check_output_size(struct s2_walk_info *wi, phys_addr_t output)
+{
+ unsigned int output_size = wi->max_oa_bits;
+
+ if (output_size != 48 && (output & GENMASK_ULL(47, output_size)))
+ return -1;
+
+ return 0;
+}
+
+/*
+ * This is essentially a C-version of the pseudo code from the ARM ARM
+ * AArch64.TranslationTableWalk function. I strongly recommend looking at
+ * that pseudocode in trying to understand this.
+ *
+ * Must be called with the kvm->srcu read lock held
+ */
+static int walk_nested_s2_pgd(phys_addr_t ipa,
+ struct s2_walk_info *wi, struct kvm_s2_trans *out)
+{
+ int first_block_level, level, stride, input_size, base_lower_bound;
+ phys_addr_t base_addr;
+ unsigned int addr_top, addr_bottom;
+ u64 desc; /* page table entry */
+ int ret;
+ phys_addr_t paddr;
+
+ switch (BIT(wi->pgshift)) {
+ default:
+ case SZ_64K:
+ case SZ_16K:
+ level = 3 - wi->sl;
+ first_block_level = 2;
+ break;
+ case SZ_4K:
+ level = 2 - wi->sl;
+ first_block_level = 1;
+ break;
+ }
+
+ stride = wi->pgshift - 3;
+ input_size = get_ia_size(wi);
+ if (input_size > 48 || input_size < 25)
+ return -EFAULT;
+
+ ret = check_base_s2_limits(wi, level, input_size, stride);
+ if (WARN_ON(ret))
+ return ret;
+
+ base_lower_bound = 3 + input_size - ((3 - level) * stride +
+ wi->pgshift);
+ base_addr = wi->baddr & GENMASK_ULL(47, base_lower_bound);
+
+ if (check_output_size(wi, base_addr)) {
+ out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
+ return 1;
+ }
+
+ addr_top = input_size - 1;
+
+ while (1) {
+ phys_addr_t index;
+
+ addr_bottom = (3 - level) * stride + wi->pgshift;
+ index = (ipa & GENMASK_ULL(addr_top, addr_bottom))
+ >> (addr_bottom - 3);
+
+ paddr = base_addr | index;
+ ret = wi->read_desc(paddr, &desc, wi->data);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Handle reversedescriptors if endianness differs between the
+ * host and the guest hypervisor.
+ */
+ if (wi->be)
+ desc = be64_to_cpu((__force __be64)desc);
+ else
+ desc = le64_to_cpu((__force __le64)desc);
+
+ /* Check for valid descriptor at this point */
+ if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) {
+ out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
+ out->upper_attr = desc;
+ return 1;
+ }
+
+ /* We're at the final level or block translation level */
+ if ((desc & 3) == 1 || level == 3)
+ break;
+
+ if (check_output_size(wi, desc)) {
+ out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
+ out->upper_attr = desc;
+ return 1;
+ }
+
+ base_addr = desc & GENMASK_ULL(47, wi->pgshift);
+
+ level += 1;
+ addr_top = addr_bottom - 1;
+ }
+
+ if (level < first_block_level) {
+ out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
+ out->upper_attr = desc;
+ return 1;
+ }
+
+ /*
+ * We don't use the contiguous bit in the stage-2 ptes, so skip check
+ * for misprogramming of the contiguous bit.
+ */
+
+ if (check_output_size(wi, desc)) {
+ out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
+ out->upper_attr = desc;
+ return 1;
+ }
+
+ if (!(desc & BIT(10))) {
+ out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS);
+ out->upper_attr = desc;
+ return 1;
+ }
+
+ /* Calculate and return the result */
+ paddr = (desc & GENMASK_ULL(47, addr_bottom)) |
+ (ipa & GENMASK_ULL(addr_bottom - 1, 0));
+ out->output = paddr;
+ out->block_size = 1UL << ((3 - level) * stride + wi->pgshift);
+ out->readable = desc & (0b01 << 6);
+ out->writable = desc & (0b10 << 6);
+ out->level = level;
+ out->upper_attr = desc & GENMASK_ULL(63, 52);
+ return 0;
+}
+
+static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, void *data)
+{
+ struct kvm_vcpu *vcpu = data;
+
+ return kvm_read_guest(vcpu->kvm, pa, desc, sizeof(*desc));
+}
+
+static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
+{
+ wi->t0sz = vtcr & TCR_EL2_T0SZ_MASK;
+
+ switch (vtcr & VTCR_EL2_TG0_MASK) {
+ case VTCR_EL2_TG0_4K:
+ wi->pgshift = 12; break;
+ case VTCR_EL2_TG0_16K:
+ wi->pgshift = 14; break;
+ case VTCR_EL2_TG0_64K:
+ default: /* IMPDEF: treat any other value as 64k */
+ wi->pgshift = 16; break;
+ }
+
+ wi->sl = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
+ /* Global limit for now, should eventually be per-VM */
+ wi->max_oa_bits = min(get_kvm_ipa_limit(),
+ ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr)));
+}
+
+int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
+ struct kvm_s2_trans *result)
+{
+ u64 vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2);
+ struct s2_walk_info wi;
+ int ret;
+
+ result->esr = 0;
+
+ if (!vcpu_has_nv(vcpu))
+ return 0;
+
+ wi.read_desc = read_guest_s2_desc;
+ wi.data = vcpu;
+ wi.baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
+
+ vtcr_to_walk_info(vtcr, &wi);
+
+ wi.be = vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_EE;
+
+ ret = walk_nested_s2_pgd(gipa, &wi, result);
+ if (ret)
+ result->esr |= (kvm_vcpu_get_esr(vcpu) & ~ESR_ELx_FSC);
+
+ return ret;
+}
+
struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
--
2.39.2
next prev parent reply other threads:[~2024-06-14 14:46 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-14 14:45 [PATCH v3 00/16] KVM: arm64: nv: Shadow stage-2 page table handling Marc Zyngier
2024-06-14 14:45 ` [PATCH v3 01/16] KVM: arm64: nv: Support multiple nested Stage-2 mmu structures Marc Zyngier
2024-06-14 14:45 ` Marc Zyngier [this message]
2026-02-04 8:28 ` [PATCH v3 02/16] KVM: arm64: nv: Implement nested Stage-2 page table walk logic Zenghui Yu
2026-02-06 11:05 ` Marc Zyngier
2026-02-08 18:34 ` Zenghui Yu
2024-06-14 14:45 ` [PATCH v3 03/16] KVM: arm64: nv: Handle shadow stage 2 page faults Marc Zyngier
2024-08-21 19:11 ` Zenghui Yu
2024-08-22 6:31 ` Oliver Upton
2024-06-14 14:45 ` [PATCH v3 04/16] KVM: arm64: nv: Unmap/flush shadow stage 2 page tables Marc Zyngier
2024-06-14 14:45 ` [PATCH v3 05/16] KVM: arm64: nv: Add Stage-1 EL2 invalidation primitives Marc Zyngier
2024-06-14 14:45 ` [PATCH v3 06/16] KVM: arm64: nv: Handle EL2 Stage-1 TLB invalidation Marc Zyngier
2024-06-14 14:45 ` [PATCH v3 07/16] KVM: arm64: nv: Handle TLB invalidation targeting L2 stage-1 Marc Zyngier
2024-06-14 14:45 ` [PATCH v3 08/16] KVM: arm64: nv: Handle TLBI VMALLS12E1{,IS} operations Marc Zyngier
2024-06-14 14:45 ` [PATCH v3 09/16] KVM: arm64: nv: Handle TLBI ALLE1{,IS} operations Marc Zyngier
2024-06-14 14:45 ` [PATCH v3 10/16] KVM: arm64: nv: Handle TLBI IPAS2E1{,IS} operations Marc Zyngier
2024-06-14 14:45 ` [PATCH v3 11/16] KVM: arm64: nv: Handle FEAT_TTL hinted TLB operations Marc Zyngier
2024-06-14 14:45 ` [PATCH v3 12/16] KVM: arm64: nv: Tag shadow S2 entries with guest's leaf S2 level Marc Zyngier
2024-06-14 14:45 ` [PATCH v3 13/16] KVM: arm64: nv: Invalidate TLBs based on shadow S2 TTL-like information Marc Zyngier
2024-06-14 14:45 ` [PATCH v3 14/16] KVM: arm64: nv: Add handling of outer-shareable TLBI operations Marc Zyngier
2024-06-14 14:45 ` [PATCH v3 15/16] KVM: arm64: nv: Add handling of range-based " Marc Zyngier
2024-06-14 14:45 ` [PATCH v3 16/16] KVM: arm64: nv: Add handling of NXS-flavoured " Marc Zyngier
2024-06-19 8:41 ` [PATCH v3 00/16] KVM: arm64: nv: Shadow stage-2 page table handling Oliver Upton
2024-11-21 8:11 ` Ganapatrao Kulkarni
2024-11-21 16:44 ` Marc Zyngier
2024-11-22 16:54 ` Ganapatrao Kulkarni
2024-11-22 19:04 ` Marc Zyngier
2024-11-23 9:49 ` Marc Zyngier
2024-12-05 11:50 ` Darren Hart
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240614144552.2773592-3-maz@kernel.org \
--to=maz@kernel.org \
--cc=alexandru.elisei@arm.com \
--cc=christoffer.dall@arm.com \
--cc=gankulkarni@os.amperecomputing.com \
--cc=james.morse@arm.com \
--cc=joey.gouly@arm.com \
--cc=kvm@vger.kernel.org \
--cc=kvmarm@lists.linux.dev \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=oliver.upton@linux.dev \
--cc=suzuki.poulose@arm.com \
--cc=yuzenghui@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.