* [PATCH 01/12] arm64: Detect FEAT_XNX
2025-11-12 18:33 [PATCH 00/12] KVM: arm64: nv: Implement FEAT_XNX and FEAT_HAF Oliver Upton
@ 2025-11-12 18:33 ` Oliver Upton
2025-11-12 18:33 ` [PATCH 02/12] KVM: arm64: Add support for FEAT_XNX stage-2 permissions Oliver Upton
` (11 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: Oliver Upton @ 2025-11-12 18:33 UTC (permalink / raw)
To: kvmarm; +Cc: Marc Zyngier, Joey Gouly, Suzuki K Poulose, Zenghui Yu,
Oliver Upton
Detect the feature in anticipation of using it in KVM.
Signed-off-by: Oliver Upton <oupton@kernel.org>
---
arch/arm64/kernel/cpufeature.c | 7 +++++++
arch/arm64/tools/cpucaps | 1 +
2 files changed, 8 insertions(+)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 5ed401ff79e3..aa3ecae252d3 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -3088,6 +3088,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_GICV5_LEGACY,
.matches = test_has_gicv5_legacy,
},
+ {
+ .desc = "XNX",
+ .capability = ARM64_HAS_XNX,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, XNX, IMP)
+ },
{},
};
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 1b32c1232d28..ee74199107d3 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -64,6 +64,7 @@ HAS_TLB_RANGE
HAS_VA52
HAS_VIRT_HOST_EXTN
HAS_WFXT
+HAS_XNX
HAFT
HW_DBM
KVM_HVHE
--
2.47.3
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 02/12] KVM: arm64: Add support for FEAT_XNX stage-2 permissions
2025-11-12 18:33 [PATCH 00/12] KVM: arm64: nv: Implement FEAT_XNX and FEAT_HAF Oliver Upton
2025-11-12 18:33 ` [PATCH 01/12] arm64: Detect FEAT_XNX Oliver Upton
@ 2025-11-12 18:33 ` Oliver Upton
2025-11-12 18:33 ` [PATCH 03/12] KVM: arm64: nv: Forward FEAT_XNX permissions to the shadow stage-2 Oliver Upton
` (10 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: Oliver Upton @ 2025-11-12 18:33 UTC (permalink / raw)
To: kvmarm; +Cc: Marc Zyngier, Joey Gouly, Suzuki K Poulose, Zenghui Yu,
Oliver Upton
FEAT_XNX adds support for encoding separate execute permissions for EL0
and EL1 at stage-2. Add support for this to the page table library,
hiding the unintuitive encoding scheme behind generic pX and uX
permission flags.
Signed-off-by: Oliver Upton <oupton@kernel.org>
---
arch/arm64/include/asm/kvm_pgtable.h | 17 +++++----
arch/arm64/kvm/hyp/pgtable.c | 54 ++++++++++++++++++++++++----
2 files changed, 58 insertions(+), 13 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 2888b5d03757..c72149a607d6 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -89,7 +89,7 @@ typedef u64 kvm_pte_t;
#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
-#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
+#define KVM_PTE_LEAF_ATTR_HI_S2_XN GENMASK(54, 53)
#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
@@ -251,12 +251,15 @@ enum kvm_pgtable_stage2_flags {
* @KVM_PGTABLE_PROT_SW3: Software bit 3.
*/
enum kvm_pgtable_prot {
- KVM_PGTABLE_PROT_X = BIT(0),
- KVM_PGTABLE_PROT_W = BIT(1),
- KVM_PGTABLE_PROT_R = BIT(2),
-
- KVM_PGTABLE_PROT_DEVICE = BIT(3),
- KVM_PGTABLE_PROT_NORMAL_NC = BIT(4),
+ KVM_PGTABLE_PROT_PX = BIT(0),
+ KVM_PGTABLE_PROT_UX = BIT(1),
+ KVM_PGTABLE_PROT_X = KVM_PGTABLE_PROT_PX |
+ KVM_PGTABLE_PROT_UX,
+ KVM_PGTABLE_PROT_W = BIT(2),
+ KVM_PGTABLE_PROT_R = BIT(3),
+
+ KVM_PGTABLE_PROT_DEVICE = BIT(4),
+ KVM_PGTABLE_PROT_NORMAL_NC = BIT(5),
KVM_PGTABLE_PROT_SW0 = BIT(55),
KVM_PGTABLE_PROT_SW1 = BIT(56),
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index c351b4abd5db..8c813bf70b38 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -661,11 +661,36 @@ void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
+static int stage2_set_xn_attr(enum kvm_pgtable_prot prot, kvm_pte_t *attr)
+{
+ bool px, ux;
+ u8 xn;
+
+ px = prot & KVM_PGTABLE_PROT_PX;
+ ux = prot & KVM_PGTABLE_PROT_UX;
+
+ if (!cpus_have_final_cap(ARM64_HAS_XNX) && px != ux)
+ return -EINVAL;
+
+ if (px && ux)
+ xn = 0b00;
+ else if (!px && ux)
+ xn = 0b01;
+ else if (!px && !ux)
+ xn = 0b10;
+ else
+ xn = 0b11;
+
+ *attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, xn);
+ return 0;
+}
+
static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
kvm_pte_t *ptep)
{
kvm_pte_t attr;
u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
+ int r;
switch (prot & (KVM_PGTABLE_PROT_DEVICE |
KVM_PGTABLE_PROT_NORMAL_NC)) {
@@ -685,8 +710,9 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
attr = KVM_S2_MEMATTR(pgt, NORMAL);
}
- if (!(prot & KVM_PGTABLE_PROT_X))
- attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
+ r = stage2_set_xn_attr(prot, &attr);
+ if (r)
+ return r;
if (prot & KVM_PGTABLE_PROT_R)
attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
@@ -715,8 +741,19 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)
prot |= KVM_PGTABLE_PROT_R;
if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W)
prot |= KVM_PGTABLE_PROT_W;
- if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN))
- prot |= KVM_PGTABLE_PROT_X;
+
+ switch (FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, pte)) {
+ case 0b00:
+ prot |= KVM_PGTABLE_PROT_PX | KVM_PGTABLE_PROT_UX;
+ break;
+ case 0b01:
+ prot |= KVM_PGTABLE_PROT_UX;
+ break;
+ case 0b11:
+ prot |= KVM_PGTABLE_PROT_PX;
+ break;
+ default:
+ }
return prot;
}
@@ -1293,6 +1330,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
int ret;
s8 level;
kvm_pte_t set = 0, clr = 0;
+ kvm_pte_t xn;
if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
return -EINVAL;
@@ -1303,8 +1341,12 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
if (prot & KVM_PGTABLE_PROT_W)
set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
- if (prot & KVM_PGTABLE_PROT_X)
- clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
+ ret = stage2_set_xn_attr(prot, &xn);
+ if (ret)
+ return ret;
+
+ set |= xn & KVM_PTE_LEAF_ATTR_HI_S2_XN;
+ clr |= ~xn & KVM_PTE_LEAF_ATTR_HI_S2_XN;
ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, flags);
if (!ret || ret == -EAGAIN)
--
2.47.3
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 03/12] KVM: arm64: nv: Forward FEAT_XNX permissions to the shadow stage-2
2025-11-12 18:33 [PATCH 00/12] KVM: arm64: nv: Implement FEAT_XNX and FEAT_HAF Oliver Upton
2025-11-12 18:33 ` [PATCH 01/12] arm64: Detect FEAT_XNX Oliver Upton
2025-11-12 18:33 ` [PATCH 02/12] KVM: arm64: Add support for FEAT_XNX stage-2 permissions Oliver Upton
@ 2025-11-12 18:33 ` Oliver Upton
2025-11-12 18:33 ` [PATCH 04/12] KVM: arm64: Teach ptdump about FEAT_XNX permissions Oliver Upton
` (9 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: Oliver Upton @ 2025-11-12 18:33 UTC (permalink / raw)
To: kvmarm; +Cc: Marc Zyngier, Joey Gouly, Suzuki K Poulose, Zenghui Yu,
Oliver Upton
Add support for FEAT_XNX to shadow stage-2 MMUs, being careful to only
evaluate XN[0] when the feature is actually exposed to the VM.
Restructure the layering of permissions in the fault handler to assume
pX and uX then restricting based on the guest's stage-2 afterwards.
Signed-off-by: Oliver Upton <oupton@kernel.org>
---
arch/arm64/include/asm/kvm_nested.h | 37 +++++++++++++++++++++++++++--
arch/arm64/kvm/mmu.c | 23 ++++++++++++++----
arch/arm64/kvm/nested.c | 5 +++-
3 files changed, 57 insertions(+), 8 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index f7c06a840963..5d967b60414c 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -120,9 +120,42 @@ static inline bool kvm_s2_trans_writable(struct kvm_s2_trans *trans)
return trans->writable;
}
-static inline bool kvm_s2_trans_executable(struct kvm_s2_trans *trans)
+static inline bool kvm_has_xnx(struct kvm *kvm)
{
- return !(trans->desc & BIT(54));
+ return cpus_have_final_cap(ARM64_HAS_XNX) &&
+ kvm_has_feat(kvm, ID_AA64MMFR1_EL1, XNX, IMP);
+}
+
+static inline bool kvm_s2_trans_exec_el0(struct kvm *kvm, struct kvm_s2_trans *trans)
+{
+ u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc);
+
+ if (!kvm_has_xnx(kvm))
+ xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10);
+
+ switch (xn) {
+ case 0b00:
+ case 0b01:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool kvm_s2_trans_exec_el1(struct kvm *kvm, struct kvm_s2_trans *trans)
+{
+ u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc);
+
+ if (!kvm_has_xnx(kvm))
+ xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10);
+
+ switch (xn) {
+ case 0b00:
+ case 0b11:
+ return true;
+ default:
+ return false;
+ }
}
extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 7cc964af8d30..96f1786c72fe 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1521,6 +1521,16 @@ static void adjust_nested_fault_perms(struct kvm_s2_trans *nested,
*prot |= kvm_encode_nested_level(nested);
}
+static void adjust_nested_exec_perms(struct kvm *kvm,
+ struct kvm_s2_trans *nested,
+ enum kvm_pgtable_prot *prot)
+{
+ if (!kvm_s2_trans_exec_el0(kvm, nested))
+ *prot &= ~KVM_PGTABLE_PROT_UX;
+ if (!kvm_s2_trans_exec_el1(kvm, nested))
+ *prot &= ~KVM_PGTABLE_PROT_PX;
+}
+
#define KVM_PGTABLE_WALK_MEMABORT_FLAGS (KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED)
static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
@@ -1572,11 +1582,12 @@ static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (writable)
prot |= KVM_PGTABLE_PROT_W;
- if (exec_fault ||
- (cpus_have_final_cap(ARM64_HAS_CACHE_DIC) &&
- (!nested || kvm_s2_trans_executable(nested))))
+ if (exec_fault || cpus_have_final_cap(ARM64_HAS_CACHE_DIC))
prot |= KVM_PGTABLE_PROT_X;
+ if (nested)
+ adjust_nested_exec_perms(kvm, nested, &prot);
+
kvm_fault_lock(kvm);
if (mmu_invalidate_retry(kvm, mmu_seq)) {
ret = -EAGAIN;
@@ -1851,11 +1862,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
prot |= KVM_PGTABLE_PROT_NORMAL_NC;
else
prot |= KVM_PGTABLE_PROT_DEVICE;
- } else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC) &&
- (!nested || kvm_s2_trans_executable(nested))) {
+ } else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC)) {
prot |= KVM_PGTABLE_PROT_X;
}
+ if (nested)
+ adjust_nested_exec_perms(kvm, nested, &prot);
+
/*
* Under the premise of getting a FSC_PERM fault, we just need to relax
* permissions only if vma_pagesize equals fault_granule. Otherwise,
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index f04cda40545b..92b2a69f0b89 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -788,7 +788,10 @@ int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, struct kvm_s2_trans *trans)
return 0;
if (kvm_vcpu_trap_is_iabt(vcpu)) {
- forward_fault = !kvm_s2_trans_executable(trans);
+ if (vcpu_mode_priv(vcpu))
+ forward_fault = !kvm_s2_trans_exec_el1(vcpu->kvm, trans);
+ else
+ forward_fault = !kvm_s2_trans_exec_el0(vcpu->kvm, trans);
} else {
bool write_fault = kvm_is_write_fault(vcpu);
--
2.47.3
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 04/12] KVM: arm64: Teach ptdump about FEAT_XNX permissions
2025-11-12 18:33 [PATCH 00/12] KVM: arm64: nv: Implement FEAT_XNX and FEAT_HAF Oliver Upton
` (2 preceding siblings ...)
2025-11-12 18:33 ` [PATCH 03/12] KVM: arm64: nv: Forward FEAT_XNX permissions to the shadow stage-2 Oliver Upton
@ 2025-11-12 18:33 ` Oliver Upton
2025-11-12 18:33 ` [PATCH 05/12] KVM: arm64: nv: Advertise support for FEAT_XNX Oliver Upton
` (8 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: Oliver Upton @ 2025-11-12 18:33 UTC (permalink / raw)
To: kvmarm; +Cc: Marc Zyngier, Joey Gouly, Suzuki K Poulose, Zenghui Yu,
Oliver Upton
Although KVM doesn't make direct use of the feature, guest hypervisors
can use FEAT_XNX which influences the permissions of the shadow stage-2.
Update ptdump to separately print the privileged and unprivileged
execute permissions.
Signed-off-by: Oliver Upton <oupton@kernel.org>
---
arch/arm64/kvm/ptdump.c | 35 +++++++++++++++++++++++++++--------
1 file changed, 27 insertions(+), 8 deletions(-)
diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
index dc5acfb00af9..bd722383d0e3 100644
--- a/arch/arm64/kvm/ptdump.c
+++ b/arch/arm64/kvm/ptdump.c
@@ -31,27 +31,46 @@ static const struct ptdump_prot_bits stage2_pte_bits[] = {
.val = PTE_VALID,
.set = " ",
.clear = "F",
- }, {
+ },
+ {
.mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R,
.val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R,
.set = "R",
.clear = " ",
- }, {
+ },
+ {
.mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
.val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
.set = "W",
.clear = " ",
- }, {
+ },
+ {
.mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
- .val = KVM_PTE_LEAF_ATTR_HI_S2_XN,
- .set = "NX",
- .clear = "x ",
- }, {
+ .val = FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b00),
+ .set = "px ux ",
+ },
+ {
+ .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
+ .val = FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b01),
+ .set = "PXNux ",
+ },
+ {
+ .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
+ .val = FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10),
+ .set = "PXNUXN",
+ },
+ {
+ .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
+ .val = FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b11),
+ .set = "px UXN",
+ },
+ {
.mask = KVM_PTE_LEAF_ATTR_LO_S2_AF,
.val = KVM_PTE_LEAF_ATTR_LO_S2_AF,
.set = "AF",
.clear = " ",
- }, {
+ },
+ {
.mask = PMD_TYPE_MASK,
.val = PMD_TYPE_SECT,
.set = "BLK",
--
2.47.3
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 05/12] KVM: arm64: nv: Advertise support for FEAT_XNX
2025-11-12 18:33 [PATCH 00/12] KVM: arm64: nv: Implement FEAT_XNX and FEAT_HAF Oliver Upton
` (3 preceding siblings ...)
2025-11-12 18:33 ` [PATCH 04/12] KVM: arm64: Teach ptdump about FEAT_XNX permissions Oliver Upton
@ 2025-11-12 18:33 ` Oliver Upton
2025-11-12 18:34 ` [PATCH 06/12] KVM: arm64: Call helper for reading descriptors directly Oliver Upton
` (7 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: Oliver Upton @ 2025-11-12 18:33 UTC (permalink / raw)
To: kvmarm; +Cc: Marc Zyngier, Joey Gouly, Suzuki K Poulose, Zenghui Yu,
Oliver Upton
Everything is in place to support FEAT_XNX, advertise support.
Signed-off-by: Oliver Upton <oupton@kernel.org>
---
arch/arm64/kvm/nested.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 92b2a69f0b89..08839a320a45 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -1559,7 +1559,6 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
val &= ~(ID_AA64MMFR1_EL1_CMOW |
ID_AA64MMFR1_EL1_nTLBPA |
ID_AA64MMFR1_EL1_ETS |
- ID_AA64MMFR1_EL1_XNX |
ID_AA64MMFR1_EL1_HAFDBS);
/* FEAT_E2H0 implies no VHE */
if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features))
--
2.47.3
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 06/12] KVM: arm64: Call helper for reading descriptors directly
2025-11-12 18:33 [PATCH 00/12] KVM: arm64: nv: Implement FEAT_XNX and FEAT_HAF Oliver Upton
` (4 preceding siblings ...)
2025-11-12 18:33 ` [PATCH 05/12] KVM: arm64: nv: Advertise support for FEAT_XNX Oliver Upton
@ 2025-11-12 18:34 ` Oliver Upton
2025-11-12 18:34 ` [PATCH 07/12] KVM: arm64: Handle endianness in read helper for emulated PTW Oliver Upton
` (6 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: Oliver Upton @ 2025-11-12 18:34 UTC (permalink / raw)
To: kvmarm; +Cc: Marc Zyngier, Joey Gouly, Suzuki K Poulose, Zenghui Yu,
Oliver Upton
Going through a function pointer doesn't serve much purpose when there's
only one implementation.
Signed-off-by: Oliver Upton <oupton@kernel.org>
---
arch/arm64/kvm/nested.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 08839a320a45..cf4b24e04a1a 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -124,7 +124,6 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
}
struct s2_walk_info {
- int (*read_desc)(phys_addr_t pa, u64 *desc, void *data);
void *data;
u64 baddr;
unsigned int max_oa_bits;
@@ -199,6 +198,15 @@ static int check_output_size(struct s2_walk_info *wi, phys_addr_t output)
return 0;
}
+static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, void *data)
+{
+ struct kvm_vcpu *vcpu = data;
+ u64 val;
+ int r;
+
+ return kvm_read_guest(vcpu->kvm, pa, desc, sizeof(*desc));
+}
+
/*
* This is essentially a C-version of the pseudo code from the ARM ARM
* AArch64.TranslationTableWalk function. I strongly recommend looking at
@@ -257,7 +265,7 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
>> (addr_bottom - 3);
paddr = base_addr | index;
- ret = wi->read_desc(paddr, &desc, wi->data);
+ ret = read_guest_s2_desc(paddr, &desc, wi->data);
if (ret < 0)
return ret;
@@ -325,13 +333,6 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
return 0;
}
-static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, void *data)
-{
- struct kvm_vcpu *vcpu = data;
-
- return kvm_read_guest(vcpu->kvm, pa, desc, sizeof(*desc));
-}
-
static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
{
wi->t0sz = vtcr & TCR_EL2_T0SZ_MASK;
@@ -364,7 +365,6 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
if (!vcpu_has_nv(vcpu))
return 0;
- wi.read_desc = read_guest_s2_desc;
wi.data = vcpu;
wi.baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
--
2.47.3
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 07/12] KVM: arm64: Handle endianness in read helper for emulated PTW
2025-11-12 18:33 [PATCH 00/12] KVM: arm64: nv: Implement FEAT_XNX and FEAT_HAF Oliver Upton
` (5 preceding siblings ...)
2025-11-12 18:34 ` [PATCH 06/12] KVM: arm64: Call helper for reading descriptors directly Oliver Upton
@ 2025-11-12 18:34 ` Oliver Upton
2025-11-12 18:34 ` [PATCH 08/12] KVM: arm64: nv: Use pgtable definitions in stage-2 walk Oliver Upton
` (5 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: Oliver Upton @ 2025-11-12 18:34 UTC (permalink / raw)
To: kvmarm; +Cc: Marc Zyngier, Joey Gouly, Suzuki K Poulose, Zenghui Yu,
Oliver Upton
Implementing FEAT_HAFDBS means adding another descriptor accessor that
needs to deal with the guest-configured endianness. Prepare by moving
the endianness handling into the read accessor and out of the main body
of the S1/S2 PTWs.
Signed-off-by: Oliver Upton <oupton@kernel.org>
---
arch/arm64/kvm/at.c | 25 +++++++++++++++++++------
arch/arm64/kvm/nested.c | 30 +++++++++++++++++-------------
2 files changed, 36 insertions(+), 19 deletions(-)
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index be26d5aa668c..a295a37dd3b1 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -362,6 +362,24 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
return -EFAULT;
}
+static int kvm_read_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 *desc,
+ struct s1_walk_info *wi)
+{
+ u64 val;
+ int r;
+
+ r = kvm_read_guest(vcpu->kvm, pa, &val, sizeof(val));
+ if (r)
+ return r;
+
+ if (wi->be)
+ *desc = be64_to_cpu((__force __be64)val);
+ else
+ *desc = le64_to_cpu((__force __le64)val);
+
+ return 0;
+}
+
static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
struct s1_walk_result *wr, u64 va)
{
@@ -414,17 +432,12 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
return ret;
}
- ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
+ ret = kvm_read_s1_desc(vcpu, ipa, &desc, wi);
if (ret) {
fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
return ret;
}
- if (wi->be)
- desc = be64_to_cpu((__force __be64)desc);
- else
- desc = le64_to_cpu((__force __le64)desc);
-
/* Invalid descriptor */
if (!(desc & BIT(0)))
goto transfault;
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index cf4b24e04a1a..10e68aab3d2a 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -198,13 +198,26 @@ static int check_output_size(struct s2_walk_info *wi, phys_addr_t output)
return 0;
}
-static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, void *data)
+static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, struct s2_walk_info *wi)
{
- struct kvm_vcpu *vcpu = data;
+ struct kvm_vcpu *vcpu = wi->data;
u64 val;
int r;
- return kvm_read_guest(vcpu->kvm, pa, desc, sizeof(*desc));
+ r = kvm_read_guest(vcpu->kvm, pa, &val, sizeof(val));
+ if (r)
+ return r;
+
+ /*
+ * Handle reversedescriptors if endianness differs between the
+ * host and the guest hypervisor.
+ */
+ if (wi->be)
+ *desc = be64_to_cpu((__force __be64)val);
+ else
+ *desc = le64_to_cpu((__force __le64)val);
+
+ return 0;
}
/*
@@ -265,19 +278,10 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
>> (addr_bottom - 3);
paddr = base_addr | index;
- ret = read_guest_s2_desc(paddr, &desc, wi->data);
+ ret = read_guest_s2_desc(paddr, &desc, wi);
if (ret < 0)
return ret;
- /*
- * Handle reversedescriptors if endianness differs between the
- * host and the guest hypervisor.
- */
- if (wi->be)
- desc = be64_to_cpu((__force __be64)desc);
- else
- desc = le64_to_cpu((__force __le64)desc);
-
/* Check for valid descriptor at this point */
if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
--
2.47.3
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 08/12] KVM: arm64: nv: Use pgtable definitions in stage-2 walk
2025-11-12 18:33 [PATCH 00/12] KVM: arm64: nv: Implement FEAT_XNX and FEAT_HAF Oliver Upton
` (6 preceding siblings ...)
2025-11-12 18:34 ` [PATCH 07/12] KVM: arm64: Handle endianness in read helper for emulated PTW Oliver Upton
@ 2025-11-12 18:34 ` Oliver Upton
2025-11-12 18:34 ` [PATCH 09/12] KVM: arm64: Add helper for swapping guest descriptor Oliver Upton
` (4 subsequent siblings)
12 siblings, 0 replies; 19+ messages in thread
From: Oliver Upton @ 2025-11-12 18:34 UTC (permalink / raw)
To: kvmarm; +Cc: Marc Zyngier, Joey Gouly, Suzuki K Poulose, Zenghui Yu,
Oliver Upton
Use the existing page table definitions instead of magic numbers for the
stage-2 table walk.
Signed-off-by: Oliver Upton <oupton@kernel.org>
---
arch/arm64/kvm/nested.c | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 10e68aab3d2a..b2fb3d7c9c19 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -283,14 +283,23 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
return ret;
/* Check for valid descriptor at this point */
- if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) {
+ if (!(desc & KVM_PTE_VALID)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
out->desc = desc;
return 1;
}
- /* We're at the final level or block translation level */
- if ((desc & 3) == 1 || level == 3)
+ if (FIELD_GET(KVM_PTE_TYPE, desc) == KVM_PTE_TYPE_BLOCK) {
+ if (level < 3)
+ break;
+
+ out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
+ out->desc = desc;
+ return 1;
+ }
+
+ /* We're at the final level */
+ if (level == 3)
break;
if (check_output_size(wi, desc)) {
@@ -317,7 +326,7 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
return 1;
}
- if (!(desc & BIT(10))) {
+ if (!(desc & KVM_PTE_LEAF_ATTR_LO_S2_AF)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS);
out->desc = desc;
return 1;
@@ -330,8 +339,8 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
(ipa & GENMASK_ULL(addr_bottom - 1, 0));
out->output = paddr;
out->block_size = 1UL << ((3 - level) * stride + wi->pgshift);
- out->readable = desc & (0b01 << 6);
- out->writable = desc & (0b10 << 6);
+ out->readable = desc & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
+ out->writable = desc & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
out->level = level;
out->desc = desc;
return 0;
--
2.47.3
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 09/12] KVM: arm64: Add helper for swapping guest descriptor
2025-11-12 18:33 [PATCH 00/12] KVM: arm64: nv: Implement FEAT_XNX and FEAT_HAF Oliver Upton
` (7 preceding siblings ...)
2025-11-12 18:34 ` [PATCH 08/12] KVM: arm64: nv: Use pgtable definitions in stage-2 walk Oliver Upton
@ 2025-11-12 18:34 ` Oliver Upton
2025-11-17 14:14 ` Marc Zyngier
2025-11-12 18:34 ` [PATCH 10/12] KVM: arm64: Implement HW access flag management in stage-1 SW PTW Oliver Upton
` (3 subsequent siblings)
12 siblings, 1 reply; 19+ messages in thread
From: Oliver Upton @ 2025-11-12 18:34 UTC (permalink / raw)
To: kvmarm; +Cc: Marc Zyngier, Joey Gouly, Suzuki K Poulose, Zenghui Yu,
Oliver Upton
Implementing FEAT_HAFDBS in KVM's software PTWs requires the ability to
CAS a descriptor to update the in-memory value. Add an accessor to do
exactly that, coping with the fact that guest descriptors are in user
memory (duh).
While FEAT_LSE required on any system that implements NV, KVM now uses
the stage-1 PTW for non-nested use cases meaning an LL/SC implementation
is necessary as well.
Signed-off-by: Oliver Upton <oupton@kernel.org>
---
arch/arm64/include/asm/kvm_nested.h | 2 +
arch/arm64/kvm/at.c | 86 +++++++++++++++++++++++++++++
2 files changed, 88 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 5d967b60414c..6dbc2908aed9 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -403,4 +403,6 @@ void kvm_handle_s1e2_tlbi(struct kvm_vcpu *vcpu, u32 inst, u64 val);
(FIX_VNCR - __c); \
})
+int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new);
+
#endif /* __ARM64_KVM_NESTED_H */
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index a295a37dd3b1..74f3be46fa66 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -1650,3 +1650,89 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level)
return ret;
}
}
+
+static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new)
+{
+ u64 tmp = old;
+ int ret = 0;
+
+ uaccess_enable_privileged();
+
+ asm volatile(__LSE_PREAMBLE
+ "1: cas %[old], %[new], %[addr]\n"
+ "2:\n"
+ _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w[ret])
+ : [old] "+r" (old), [addr] "+Q" (*ptep), [ret] "+r" (ret)
+ : [new] "r" (new)
+ : "memory");
+
+ uaccess_disable_privileged();
+
+ if (ret)
+ return ret;
+ if (tmp != old)
+ return -EAGAIN;
+
+ return ret;
+}
+
+static int __llsc_swap_desc(u64 __user *ptep, u64 old, u64 new)
+{
+ unsigned int loops = 128;
+ u64 tmp;
+ int ret;
+
+ uaccess_enable_privileged();
+
+ asm volatile("prfm pstl1strm, %[addr]\n"
+ "1: ldxr %[tmp], %[addr]\n"
+ "sub %[tmp], %[tmp], %[old]\n"
+ "cbnz %[tmp], 3f\n"
+ "2: stlxr %w[ret], %[new], %[addr]\n"
+ "cbz %w[ret], 4f\n"
+ "sub %w[loops], %w[loops], #1\n"
+ "cbnz %w[loops], 1b\n"
+ "3: mov %w[ret], %w[eagain]\n"
+ "4:\n"
+ : [ret] "=r" (ret), [addr] "+Q" (*ptep), [tmp] "=&r" (tmp),
+ [loops] "+r" (loops)
+ : [old] "r" (old), [new] "r" (new), [eagain] "Ir" (-EAGAIN)
+ : "memory");
+
+ uaccess_disable_privileged();
+ return ret;
+}
+
+int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new)
+{
+ struct kvm_memory_slot *slot;
+ unsigned long hva;
+ u64 __user *ptep;
+ bool writable;
+ int offset;
+ gfn_t gfn;
+ int r;
+
+ gfn = ipa >> PAGE_SHIFT;
+ offset = offset_in_page(ipa);
+ slot = gfn_to_memslot(kvm, gfn);
+ hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
+ if (kvm_is_error_hva(hva))
+ return -EINVAL;
+ if (!writable)
+ return -EPERM;
+
+ ptep = (u64 __user *)hva + offset;
+ if (cpus_have_final_cap(ARM64_HAS_LSE_ATOMICS))
+ r = __lse_swap_desc(ptep, old, new);
+ else
+ r = __llsc_swap_desc(ptep, old, new);
+
+ if (r < 0)
+ return r;
+ if (r)
+ return -EAGAIN;
+
+ mark_page_dirty_in_slot(kvm, slot, gfn);
+ return 0;
+}
--
2.47.3
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH 09/12] KVM: arm64: Add helper for swapping guest descriptor
2025-11-12 18:34 ` [PATCH 09/12] KVM: arm64: Add helper for swapping guest descriptor Oliver Upton
@ 2025-11-17 14:14 ` Marc Zyngier
2025-11-17 18:13 ` Oliver Upton
0 siblings, 1 reply; 19+ messages in thread
From: Marc Zyngier @ 2025-11-17 14:14 UTC (permalink / raw)
To: Oliver Upton; +Cc: kvmarm, Joey Gouly, Suzuki K Poulose, Zenghui Yu
On Wed, 12 Nov 2025 18:34:03 +0000,
Oliver Upton <oupton@kernel.org> wrote:
>
> Implementing FEAT_HAFDBS in KVM's software PTWs requires the ability to
> CAS a descriptor to update the in-memory value. Add an accessor to do
> exactly that, coping with the fact that guest descriptors are in user
> memory (duh).
>
> While FEAT_LSE required on any system that implements NV, KVM now uses
> the stage-1 PTW for non-nested use cases meaning an LL/SC implementation
> is necessary as well.
>
> Signed-off-by: Oliver Upton <oupton@kernel.org>
> ---
> arch/arm64/include/asm/kvm_nested.h | 2 +
> arch/arm64/kvm/at.c | 86 +++++++++++++++++++++++++++++
> 2 files changed, 88 insertions(+)
>
> diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
> index 5d967b60414c..6dbc2908aed9 100644
> --- a/arch/arm64/include/asm/kvm_nested.h
> +++ b/arch/arm64/include/asm/kvm_nested.h
> @@ -403,4 +403,6 @@ void kvm_handle_s1e2_tlbi(struct kvm_vcpu *vcpu, u32 inst, u64 val);
> (FIX_VNCR - __c); \
> })
>
> +int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new);
> +
> #endif /* __ARM64_KVM_NESTED_H */
> diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
> index a295a37dd3b1..74f3be46fa66 100644
> --- a/arch/arm64/kvm/at.c
> +++ b/arch/arm64/kvm/at.c
> @@ -1650,3 +1650,89 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level)
> return ret;
> }
> }
> +
> +static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new)
> +{
> + u64 tmp = old;
> + int ret = 0;
> +
> + uaccess_enable_privileged();
> +
> + asm volatile(__LSE_PREAMBLE
> + "1: cas %[old], %[new], %[addr]\n"
> + "2:\n"
> + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w[ret])
> + : [old] "+r" (old), [addr] "+Q" (*ptep), [ret] "+r" (ret)
> + : [new] "r" (new)
> + : "memory");
> +
> + uaccess_disable_privileged();
> +
> + if (ret)
> + return ret;
> + if (tmp != old)
> + return -EAGAIN;
> +
> + return ret;
> +}
> +
> +static int __llsc_swap_desc(u64 __user *ptep, u64 old, u64 new)
> +{
> + unsigned int loops = 128;
> + u64 tmp;
> + int ret;
> +
> + uaccess_enable_privileged();
> +
> + asm volatile("prfm pstl1strm, %[addr]\n"
> + "1: ldxr %[tmp], %[addr]\n"
> + "sub %[tmp], %[tmp], %[old]\n"
> + "cbnz %[tmp], 3f\n"
> + "2: stlxr %w[ret], %[new], %[addr]\n"
> + "cbz %w[ret], 4f\n"
> + "sub %w[loops], %w[loops], #1\n"
> + "cbnz %w[loops], 1b\n"
> + "3: mov %w[ret], %w[eagain]\n"
> + "4:\n"
> + : [ret] "=r" (ret), [addr] "+Q" (*ptep), [tmp] "=&r" (tmp),
> + [loops] "+r" (loops)
> + : [old] "r" (old), [new] "r" (new), [eagain] "Ir" (-EAGAIN)
> + : "memory");
Why doesn't this need an exception table as well? I'd expect it to
fault just as much as the LSE version (and ret cannot report -EFAULT,
for example).
I'm also on the fence about the bounded loop. Yes, forward progress is
a problem, but it should only affect large systems which can readily
use the atomic instructions. I'd rather we get rid of it until proven
that we really need something like it.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH 09/12] KVM: arm64: Add helper for swapping guest descriptor
2025-11-17 14:14 ` Marc Zyngier
@ 2025-11-17 18:13 ` Oliver Upton
0 siblings, 0 replies; 19+ messages in thread
From: Oliver Upton @ 2025-11-17 18:13 UTC (permalink / raw)
To: Marc Zyngier; +Cc: kvmarm, Joey Gouly, Suzuki K Poulose, Zenghui Yu
On Mon, Nov 17, 2025 at 02:14:00PM +0000, Marc Zyngier wrote:
> On Wed, 12 Nov 2025 18:34:03 +0000,
> Oliver Upton <oupton@kernel.org> wrote:
> > +static int __llsc_swap_desc(u64 __user *ptep, u64 old, u64 new)
> > +{
> > + unsigned int loops = 128;
> > + u64 tmp;
> > + int ret;
> > +
> > + uaccess_enable_privileged();
> > +
> > + asm volatile("prfm pstl1strm, %[addr]\n"
> > + "1: ldxr %[tmp], %[addr]\n"
> > + "sub %[tmp], %[tmp], %[old]\n"
> > + "cbnz %[tmp], 3f\n"
> > + "2: stlxr %w[ret], %[new], %[addr]\n"
> > + "cbz %w[ret], 4f\n"
> > + "sub %w[loops], %w[loops], #1\n"
> > + "cbnz %w[loops], 1b\n"
> > + "3: mov %w[ret], %w[eagain]\n"
> > + "4:\n"
> > + : [ret] "=r" (ret), [addr] "+Q" (*ptep), [tmp] "=&r" (tmp),
> > + [loops] "+r" (loops)
> > + : [old] "r" (old), [new] "r" (new), [eagain] "Ir" (-EAGAIN)
> > + : "memory");
>
> Why doesn't this need an exception table as well? I'd expect it to
> fault just as much as the LSE version (and ret cannot report -EFAULT,
> for example).
I just threw in ${something} to satisfy the LL/SC case, so yes, this
needs exception fixup as well.
> I'm also on the fence about the bounded loop. Yes, forward progress is
> a problem, but it should only affect large systems which can readily
> use the atomic instructions. I'd rather we get rid of it until proven
> that we really need something like it.
From a letter of the architecture POV I'm not sure we even need the
LL/SC implementation. As of now the only non-nested use case is the SEA
TTW walker which doesn't set the access flag (nor should it) and any
implementation that has FEAT_NV must also have FEAT_LSE.
I guess that just leaves the recursive NV case where a VMM explicitly
de-features FEAT_LSE. So I guess having a trivial, non-looping
implementation would be enough to cover this pointless issue.
Thanks,
Oliver
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 10/12] KVM: arm64: Implement HW access flag management in stage-1 SW PTW
2025-11-12 18:33 [PATCH 00/12] KVM: arm64: nv: Implement FEAT_XNX and FEAT_HAF Oliver Upton
` (8 preceding siblings ...)
2025-11-12 18:34 ` [PATCH 09/12] KVM: arm64: Add helper for swapping guest descriptor Oliver Upton
@ 2025-11-12 18:34 ` Oliver Upton
2025-11-17 14:49 ` Marc Zyngier
2025-11-12 18:34 ` [PATCH 11/12] KVM: arm64: nv: Implement HW access flag management in stage-2 " Oliver Upton
` (2 subsequent siblings)
12 siblings, 1 reply; 19+ messages in thread
From: Oliver Upton @ 2025-11-12 18:34 UTC (permalink / raw)
To: kvmarm; +Cc: Marc Zyngier, Joey Gouly, Suzuki K Poulose, Zenghui Yu,
Oliver Upton
Atomically update the Access flag at stage-1 when the guest has
configured the MMU to do so. Make the implementation choice (and liberal
interpretation of speculation) that any access type updates the Access
flag, including AT and CMO instructions.
Restart the entire walk by returning to the exception-generating
instruction in the case of a failed Access flag update.
Signed-off-by: Oliver Upton <oupton@kernel.org>
---
arch/arm64/include/asm/kvm_asm.h | 6 +--
arch/arm64/include/asm/kvm_nested.h | 1 +
arch/arm64/kvm/at.c | 74 +++++++++++++++++++++++------
arch/arm64/kvm/sys_regs.c | 9 ++--
4 files changed, 69 insertions(+), 21 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 9da54d4ee49e..090f7b740bdc 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -246,9 +246,9 @@ extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding);
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
-extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
-extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
-extern void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
+extern int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
+extern int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
+extern int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 6dbc2908aed9..905c658057a4 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -353,6 +353,7 @@ struct s1_walk_info {
bool be;
bool s2;
bool pa52bit;
+ bool ha;
};
struct s1_walk_result {
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index 74f3be46fa66..9778a4241c19 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -346,6 +346,8 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
+ wi->ha = tcr & TCR_HA;
+
return 0;
addrsz:
@@ -380,10 +382,24 @@ static int kvm_read_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 *desc,
return 0;
}
+static int kvm_swap_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 old, u64 new,
+ struct s1_walk_info *wi)
+{
+ if (wi->be) {
+ old = cpu_to_be64(old);
+ new = cpu_to_be64(new);
+ } else {
+ old = cpu_to_be64(old);
+ new = cpu_to_be64(new);
+ }
+
+ return __kvm_at_swap_desc(vcpu->kvm, pa, old, new);
+}
+
static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
struct s1_walk_result *wr, u64 va)
{
- u64 va_top, va_bottom, baddr, desc;
+ u64 va_top, va_bottom, baddr, desc, new_desc, ipa;
int level, stride, ret;
level = wi->sl;
@@ -393,7 +409,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
va_top = get_ia_size(wi) - 1;
while (1) {
- u64 index, ipa;
+ u64 index;
va_bottom = (3 - level) * stride + wi->pgshift;
index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
@@ -490,6 +506,17 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
if (check_output_size(baddr & GENMASK(52, va_bottom), wi))
goto addrsz;
+ if (wi->ha)
+ new_desc |= PTE_AF;
+
+ if (new_desc != desc) {
+ ret = kvm_swap_s1_desc(vcpu, ipa, desc, new_desc, wi);
+ if (ret)
+ return ret;
+
+ desc = new_desc;
+ }
+
if (!(desc & PTE_AF)) {
fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false);
return -EACCES;
@@ -1234,7 +1261,7 @@ static void compute_s1_permissions(struct kvm_vcpu *vcpu,
wr->pr &= !pan;
}
-static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+static int handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr, u64 *par)
{
struct s1_walk_result wr = {};
struct s1_walk_info wi = {};
@@ -1259,6 +1286,11 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ /*
+ * Race to update a descriptor -- restart the walk.
+ */
+ if (ret == -EAGAIN)
+ return ret;
if (ret)
goto compute_par;
@@ -1292,7 +1324,8 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false);
compute_par:
- return compute_par_s1(vcpu, &wi, &wr);
+ *par = compute_par_s1(vcpu, &wi, &wr);
+ return 0;
}
/*
@@ -1420,9 +1453,10 @@ static bool par_check_s1_access_fault(u64 par)
!(par & SYS_PAR_EL1_S));
}
-void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
+ int ret;
/*
* If PAR_EL1 reports that AT failed on a S1 permission or access
@@ -1434,15 +1468,20 @@ void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
*/
if ((par & SYS_PAR_EL1_F) &&
!par_check_s1_perm_fault(par) &&
- !par_check_s1_access_fault(par))
- par = handle_at_slow(vcpu, op, vaddr);
+ !par_check_s1_access_fault(par)) {
+ ret = handle_at_slow(vcpu, op, vaddr, &par);
+ if (ret)
+ return ret;
+ }
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
+ return 0;
}
-void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
u64 par;
+ int ret;
/*
* We've trapped, so everything is live on the CPU. As we will be
@@ -1489,13 +1528,17 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
}
/* We failed the translation, let's replay it in slow motion */
- if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
- par = handle_at_slow(vcpu, op, vaddr);
+ if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) {
+ ret = handle_at_slow(vcpu, op, vaddr, &par);
+ if (ret)
+ return ret;
+ }
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
+ return 0;
}
-void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
struct kvm_s2_trans out = {};
u64 ipa, par;
@@ -1522,13 +1565,13 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
break;
default:
WARN_ON_ONCE(1);
- return;
+ return 0;
}
__kvm_at_s1e01(vcpu, op, vaddr);
par = vcpu_read_sys_reg(vcpu, PAR_EL1);
if (par & SYS_PAR_EL1_F)
- return;
+ return 0;
/*
* If we only have a single stage of translation (EL2&0), exit
@@ -1536,14 +1579,14 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
*/
if (compute_translation_regime(vcpu, op) == TR_EL20 ||
!(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
- return;
+ return 0;
/* Do the stage-2 translation */
ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
out.esr = 0;
ret = kvm_walk_nested_s2(vcpu, ipa, &out);
if (ret < 0)
- return;
+ return ret;
/* Check the access permission */
if (!out.esr &&
@@ -1552,6 +1595,7 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
par = compute_par_s12(vcpu, par, &out);
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
+ return 0;
}
/*
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index e67eb39ddc11..61830eb3607c 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -3767,7 +3767,8 @@ static bool handle_at_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
- __kvm_at_s1e01(vcpu, op, p->regval);
+ if (__kvm_at_s1e01(vcpu, op, p->regval))
+ return false;
return true;
}
@@ -3784,7 +3785,8 @@ static bool handle_at_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return false;
}
- __kvm_at_s1e2(vcpu, op, p->regval);
+ if (__kvm_at_s1e2(vcpu, op, p->regval))
+ return false;
return true;
}
@@ -3794,7 +3796,8 @@ static bool handle_at_s12(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
- __kvm_at_s12(vcpu, op, p->regval);
+ if (__kvm_at_s12(vcpu, op, p->regval))
+ return false;
return true;
}
--
2.47.3
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH 10/12] KVM: arm64: Implement HW access flag management in stage-1 SW PTW
2025-11-12 18:34 ` [PATCH 10/12] KVM: arm64: Implement HW access flag management in stage-1 SW PTW Oliver Upton
@ 2025-11-17 14:49 ` Marc Zyngier
2025-11-17 17:53 ` Oliver Upton
0 siblings, 1 reply; 19+ messages in thread
From: Marc Zyngier @ 2025-11-17 14:49 UTC (permalink / raw)
To: Oliver Upton; +Cc: kvmarm, Joey Gouly, Suzuki K Poulose, Zenghui Yu
On Wed, 12 Nov 2025 18:34:04 +0000,
Oliver Upton <oupton@kernel.org> wrote:
>
> Atomically update the Access flag at stage-1 when the guest has
> configured the MMU to do so. Make the implementation choice (and liberal
> interpretation of speculation) that any access type updates the Access
> flag, including AT and CMO instructions.
>
> Restart the entire walk by returning to the exception-generating
> instruction in the case of a failed Access flag update.
>
> Signed-off-by: Oliver Upton <oupton@kernel.org>
> ---
> arch/arm64/include/asm/kvm_asm.h | 6 +--
> arch/arm64/include/asm/kvm_nested.h | 1 +
> arch/arm64/kvm/at.c | 74 +++++++++++++++++++++++------
> arch/arm64/kvm/sys_regs.c | 9 ++--
> 4 files changed, 69 insertions(+), 21 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index 9da54d4ee49e..090f7b740bdc 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -246,9 +246,9 @@ extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
> extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding);
>
> extern void __kvm_timer_set_cntvoff(u64 cntvoff);
> -extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
> -extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
> -extern void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
> +extern int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
> +extern int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
> +extern int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
>
> extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
>
> diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
> index 6dbc2908aed9..905c658057a4 100644
> --- a/arch/arm64/include/asm/kvm_nested.h
> +++ b/arch/arm64/include/asm/kvm_nested.h
> @@ -353,6 +353,7 @@ struct s1_walk_info {
> bool be;
> bool s2;
> bool pa52bit;
> + bool ha;
> };
>
> struct s1_walk_result {
> diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
> index 74f3be46fa66..9778a4241c19 100644
> --- a/arch/arm64/kvm/at.c
> +++ b/arch/arm64/kvm/at.c
> @@ -346,6 +346,8 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
>
> wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
>
> + wi->ha = tcr & TCR_HA;
> +
> return 0;
>
> addrsz:
> @@ -380,10 +382,24 @@ static int kvm_read_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 *desc,
> return 0;
> }
>
> +static int kvm_swap_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 old, u64 new,
> + struct s1_walk_info *wi)
> +{
> + if (wi->be) {
> + old = cpu_to_be64(old);
> + new = cpu_to_be64(new);
> + } else {
> + old = cpu_to_be64(old);
> + new = cpu_to_be64(new);
> + }
> +
> + return __kvm_at_swap_desc(vcpu->kvm, pa, old, new);
> +}
> +
> static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
> struct s1_walk_result *wr, u64 va)
> {
> - u64 va_top, va_bottom, baddr, desc;
> + u64 va_top, va_bottom, baddr, desc, new_desc, ipa;
> int level, stride, ret;
>
> level = wi->sl;
> @@ -393,7 +409,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
> va_top = get_ia_size(wi) - 1;
>
> while (1) {
> - u64 index, ipa;
> + u64 index;
>
> va_bottom = (3 - level) * stride + wi->pgshift;
> index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
> @@ -490,6 +506,17 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
> if (check_output_size(baddr & GENMASK(52, va_bottom), wi))
> goto addrsz;
>
> + if (wi->ha)
> + new_desc |= PTE_AF;
What initialised new_desc the first place? Shouldn't there be a
'new_desc = desc;' somewhere before that?
> +
> + if (new_desc != desc) {
> + ret = kvm_swap_s1_desc(vcpu, ipa, desc, new_desc, wi);
> + if (ret)
> + return ret;
> +
> + desc = new_desc;
> + }
> +
> if (!(desc & PTE_AF)) {
> fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false);
> return -EACCES;
> @@ -1234,7 +1261,7 @@ static void compute_s1_permissions(struct kvm_vcpu *vcpu,
> wr->pr &= !pan;
> }
>
> -static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
> +static int handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr, u64 *par)
> {
> struct s1_walk_result wr = {};
> struct s1_walk_info wi = {};
> @@ -1259,6 +1286,11 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
>
> srcu_read_unlock(&vcpu->kvm->srcu, idx);
>
> + /*
> + * Race to update a descriptor -- restart the walk.
> + */
> + if (ret == -EAGAIN)
> + return ret;
> if (ret)
> goto compute_par;
>
> @@ -1292,7 +1324,8 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
> fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false);
>
> compute_par:
> - return compute_par_s1(vcpu, &wi, &wr);
> + *par = compute_par_s1(vcpu, &wi, &wr);
> + return 0;
> }
>
> /*
> @@ -1420,9 +1453,10 @@ static bool par_check_s1_access_fault(u64 par)
> !(par & SYS_PAR_EL1_S));
> }
>
> -void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
> +int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
> {
> u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
> + int ret;
>
> /*
> * If PAR_EL1 reports that AT failed on a S1 permission or access
> @@ -1434,15 +1468,20 @@ void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
> */
> if ((par & SYS_PAR_EL1_F) &&
> !par_check_s1_perm_fault(par) &&
> - !par_check_s1_access_fault(par))
> - par = handle_at_slow(vcpu, op, vaddr);
> + !par_check_s1_access_fault(par)) {
> + ret = handle_at_slow(vcpu, op, vaddr, &par);
> + if (ret)
> + return ret;
> + }
>
> vcpu_write_sys_reg(vcpu, par, PAR_EL1);
> + return 0;
> }
>
> -void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
> +int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
> {
> u64 par;
> + int ret;
>
> /*
> * We've trapped, so everything is live on the CPU. As we will be
> @@ -1489,13 +1528,17 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
> }
>
> /* We failed the translation, let's replay it in slow motion */
> - if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
> - par = handle_at_slow(vcpu, op, vaddr);
> + if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) {
> + ret = handle_at_slow(vcpu, op, vaddr, &par);
> + if (ret)
> + return ret;
> + }
>
> vcpu_write_sys_reg(vcpu, par, PAR_EL1);
> + return 0;
> }
There is a quite a bit of churn in this patch changing the signature
of the __kvm_at_s*() functions (and whatever calls them to propagate
the errors). It'd be worth pulling this refactor as a preliminary
patch, and then focus on the functional change.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH 10/12] KVM: arm64: Implement HW access flag management in stage-1 SW PTW
2025-11-17 14:49 ` Marc Zyngier
@ 2025-11-17 17:53 ` Oliver Upton
0 siblings, 0 replies; 19+ messages in thread
From: Oliver Upton @ 2025-11-17 17:53 UTC (permalink / raw)
To: Marc Zyngier; +Cc: kvmarm, Joey Gouly, Suzuki K Poulose, Zenghui Yu
Hey,
On Mon, Nov 17, 2025 at 02:49:49PM +0000, Marc Zyngier wrote:
> On Wed, 12 Nov 2025 18:34:04 +0000,
> > @@ -490,6 +506,17 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
> > if (check_output_size(baddr & GENMASK(52, va_bottom), wi))
> > goto addrsz;
> >
> > + if (wi->ha)
> > + new_desc |= PTE_AF;
>
> What initialised new_desc the first place? Shouldn't there be a
> 'new_desc = desc;' somewhere before that?
Lol, I had this right after the read succeeds but threw out that part of
the diff.
> > @@ -1489,13 +1528,17 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
> > }
> >
> > /* We failed the translation, let's replay it in slow motion */
> > - if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
> > - par = handle_at_slow(vcpu, op, vaddr);
> > + if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) {
> > + ret = handle_at_slow(vcpu, op, vaddr, &par);
> > + if (ret)
> > + return ret;
> > + }
> >
> > vcpu_write_sys_reg(vcpu, par, PAR_EL1);
> > + return 0;
> > }
>
> There is a quite a bit of churn in this patch changing the signature
> of the __kvm_at_s*() functions (and whatever calls them to propagate
> the errors). It'd be worth pulling this refactor as a preliminary
> patch, and then focus on the functional change.
Fine by me, thanks for reviewing!
--
Thanks,
Oliver
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 11/12] KVM: arm64: nv: Implement HW access flag management in stage-2 SW PTW
2025-11-12 18:33 [PATCH 00/12] KVM: arm64: nv: Implement FEAT_XNX and FEAT_HAF Oliver Upton
` (9 preceding siblings ...)
2025-11-12 18:34 ` [PATCH 10/12] KVM: arm64: Implement HW access flag management in stage-1 SW PTW Oliver Upton
@ 2025-11-12 18:34 ` Oliver Upton
2025-11-17 14:51 ` Marc Zyngier
2025-11-12 18:34 ` [PATCH 12/12] KVM: arm64: nv: Expose hardware access flag management to NV guests Oliver Upton
2025-11-17 15:21 ` [PATCH 00/12] KVM: arm64: nv: Implement FEAT_XNX and FEAT_HAF Marc Zyngier
12 siblings, 1 reply; 19+ messages in thread
From: Oliver Upton @ 2025-11-12 18:34 UTC (permalink / raw)
To: kvmarm; +Cc: Marc Zyngier, Joey Gouly, Suzuki K Poulose, Zenghui Yu,
Oliver Upton
Give the stage-2 walk similar treatment to stage-1: update the access
flag during the table walk and do so for any walk context.
Signed-off-by: Oliver Upton <oupton@kernel.org>
---
arch/arm64/kvm/mmu.c | 3 +++
arch/arm64/kvm/nested.c | 46 ++++++++++++++++++++++++++++++++++-------
2 files changed, 41 insertions(+), 8 deletions(-)
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 96f1786c72fe..b23d3dc8865e 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -2012,6 +2012,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
u32 esr;
ret = kvm_walk_nested_s2(vcpu, fault_ipa, &nested_trans);
+ if (ret == -EAGAIN)
+ return 1;
+
if (ret) {
esr = kvm_s2_trans_esr(&nested_trans);
kvm_inject_s2_fault(vcpu, esr);
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index b2fb3d7c9c19..7293769dacf5 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -124,13 +124,14 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
}
struct s2_walk_info {
- void *data;
- u64 baddr;
- unsigned int max_oa_bits;
- unsigned int pgshift;
- unsigned int sl;
- unsigned int t0sz;
- bool be;
+ void *data;
+ u64 baddr;
+ unsigned int max_oa_bits;
+ unsigned int pgshift;
+ unsigned int sl;
+ unsigned int t0sz;
+ bool be;
+ bool ha;
};
static u32 compute_fsc(int level, u32 fsc)
@@ -220,6 +221,22 @@ static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, struct s2_walk_info *wi
return 0;
}
+static int swap_guest_s2_desc(phys_addr_t pa, u64 old, u64 new,
+ struct s2_walk_info *wi)
+{
+ struct kvm_vcpu *vcpu = wi->data;
+
+ if (wi->be) {
+ old = cpu_to_be64(old);
+ new = cpu_to_be64(new);
+ } else {
+ old = cpu_to_be64(old);
+ new = cpu_to_be64(new);
+ }
+
+ return __kvm_at_swap_desc(vcpu->kvm, pa, old, new);
+}
+
/*
* This is essentially a C-version of the pseudo code from the ARM ARM
* AArch64.TranslationTableWalk function. I strongly recommend looking at
@@ -233,7 +250,7 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
int first_block_level, level, stride, input_size, base_lower_bound;
phys_addr_t base_addr;
unsigned int addr_top, addr_bottom;
- u64 desc; /* page table entry */
+ u64 desc, new_desc; /* page table entry */
int ret;
phys_addr_t paddr;
@@ -326,6 +343,17 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
return 1;
}
+ if (wi->ha)
+ new_desc |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
+
+ if (new_desc != desc) {
+ ret = swap_guest_s2_desc(paddr, desc, new_desc, wi);
+ if (ret)
+ return ret;
+
+ desc = new_desc;
+ }
+
if (!(desc & KVM_PTE_LEAF_ATTR_LO_S2_AF)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS);
out->desc = desc;
@@ -364,6 +392,8 @@ static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
/* Global limit for now, should eventually be per-VM */
wi->max_oa_bits = min(get_kvm_ipa_limit(),
ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false));
+
+ wi->ha = vtcr & VTCR_EL2_HA;
}
int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
--
2.47.3
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH 11/12] KVM: arm64: nv: Implement HW access flag management in stage-2 SW PTW
2025-11-12 18:34 ` [PATCH 11/12] KVM: arm64: nv: Implement HW access flag management in stage-2 " Oliver Upton
@ 2025-11-17 14:51 ` Marc Zyngier
0 siblings, 0 replies; 19+ messages in thread
From: Marc Zyngier @ 2025-11-17 14:51 UTC (permalink / raw)
To: Oliver Upton; +Cc: kvmarm, Joey Gouly, Suzuki K Poulose, Zenghui Yu
On Wed, 12 Nov 2025 18:34:05 +0000,
Oliver Upton <oupton@kernel.org> wrote:
>
> Give the stage-2 walk similar treatment to stage-1: update the access
> flag during the table walk and do so for any walk context.
>
> Signed-off-by: Oliver Upton <oupton@kernel.org>
> ---
> arch/arm64/kvm/mmu.c | 3 +++
> arch/arm64/kvm/nested.c | 46 ++++++++++++++++++++++++++++++++++-------
> 2 files changed, 41 insertions(+), 8 deletions(-)
>
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 96f1786c72fe..b23d3dc8865e 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -2012,6 +2012,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
> u32 esr;
>
> ret = kvm_walk_nested_s2(vcpu, fault_ipa, &nested_trans);
> + if (ret == -EAGAIN)
> + return 1;
> +
> if (ret) {
> esr = kvm_s2_trans_esr(&nested_trans);
> kvm_inject_s2_fault(vcpu, esr);
> diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
> index b2fb3d7c9c19..7293769dacf5 100644
> --- a/arch/arm64/kvm/nested.c
> +++ b/arch/arm64/kvm/nested.c
> @@ -124,13 +124,14 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
> }
>
> struct s2_walk_info {
> - void *data;
> - u64 baddr;
> - unsigned int max_oa_bits;
> - unsigned int pgshift;
> - unsigned int sl;
> - unsigned int t0sz;
> - bool be;
> + void *data;
> + u64 baddr;
> + unsigned int max_oa_bits;
> + unsigned int pgshift;
> + unsigned int sl;
> + unsigned int t0sz;
> + bool be;
> + bool ha;
> };
>
> static u32 compute_fsc(int level, u32 fsc)
> @@ -220,6 +221,22 @@ static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, struct s2_walk_info *wi
> return 0;
> }
>
> +static int swap_guest_s2_desc(phys_addr_t pa, u64 old, u64 new,
> + struct s2_walk_info *wi)
> +{
> + struct kvm_vcpu *vcpu = wi->data;
> +
> + if (wi->be) {
> + old = cpu_to_be64(old);
> + new = cpu_to_be64(new);
> + } else {
> + old = cpu_to_be64(old);
> + new = cpu_to_be64(new);
> + }
> +
> + return __kvm_at_swap_desc(vcpu->kvm, pa, old, new);
> +}
> +
> /*
> * This is essentially a C-version of the pseudo code from the ARM ARM
> * AArch64.TranslationTableWalk function. I strongly recommend looking at
> @@ -233,7 +250,7 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
> int first_block_level, level, stride, input_size, base_lower_bound;
> phys_addr_t base_addr;
> unsigned int addr_top, addr_bottom;
> - u64 desc; /* page table entry */
> + u64 desc, new_desc; /* page table entry */
> int ret;
> phys_addr_t paddr;
>
> @@ -326,6 +343,17 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
> return 1;
> }
>
> + if (wi->ha)
> + new_desc |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
Same issue here (new_desc not initialised).
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 12/12] KVM: arm64: nv: Expose hardware access flag management to NV guests
2025-11-12 18:33 [PATCH 00/12] KVM: arm64: nv: Implement FEAT_XNX and FEAT_HAF Oliver Upton
` (10 preceding siblings ...)
2025-11-12 18:34 ` [PATCH 11/12] KVM: arm64: nv: Implement HW access flag management in stage-2 " Oliver Upton
@ 2025-11-12 18:34 ` Oliver Upton
2025-11-17 15:21 ` [PATCH 00/12] KVM: arm64: nv: Implement FEAT_XNX and FEAT_HAF Marc Zyngier
12 siblings, 0 replies; 19+ messages in thread
From: Oliver Upton @ 2025-11-12 18:34 UTC (permalink / raw)
To: kvmarm; +Cc: Marc Zyngier, Joey Gouly, Suzuki K Poulose, Zenghui Yu,
Oliver Upton
Everything is in place to update the access flag at S1 and S2. Expose
support for the access flag flavor of FEAT_HAFDBS to NV guests.
Signed-off-by: Oliver Upton <oupton@kernel.org>
---
arch/arm64/kvm/nested.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 7293769dacf5..2914fd20ad1f 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -1601,11 +1601,13 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
case SYS_ID_AA64MMFR1_EL1:
val &= ~(ID_AA64MMFR1_EL1_CMOW |
ID_AA64MMFR1_EL1_nTLBPA |
- ID_AA64MMFR1_EL1_ETS |
- ID_AA64MMFR1_EL1_HAFDBS);
+ ID_AA64MMFR1_EL1_ETS);
+
/* FEAT_E2H0 implies no VHE */
if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features))
val &= ~ID_AA64MMFR1_EL1_VH;
+
+ val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64MMFR1_EL1, HAFDBS, AF);
break;
case SYS_ID_AA64MMFR2_EL1:
--
2.47.3
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH 00/12] KVM: arm64: nv: Implement FEAT_XNX and FEAT_HAF
2025-11-12 18:33 [PATCH 00/12] KVM: arm64: nv: Implement FEAT_XNX and FEAT_HAF Oliver Upton
` (11 preceding siblings ...)
2025-11-12 18:34 ` [PATCH 12/12] KVM: arm64: nv: Expose hardware access flag management to NV guests Oliver Upton
@ 2025-11-17 15:21 ` Marc Zyngier
12 siblings, 0 replies; 19+ messages in thread
From: Marc Zyngier @ 2025-11-17 15:21 UTC (permalink / raw)
To: Oliver Upton; +Cc: kvmarm, Joey Gouly, Suzuki K Poulose, Zenghui Yu
On Wed, 12 Nov 2025 18:33:54 +0000,
Oliver Upton <oupton@kernel.org> wrote:
>
> This series closes a couple of gaps between our shadow stage-2
> implementation and the architecture:
>
> FEAT_XNX - KVM doesn't make use of the feature for itself but this is
> a rather low hanging fruit which entails expressing privileged and
> unprivileged execute permissions in our pseudo-TLB
I think this part looks good, no objection from my end.
>
> FEAT_HAF - This one is a bit more involved, requiring the PTW
> implementations to atomically update descriptors in user memory to set
> the Access Flag. I've made the implementation choice that AT
> instructions also update the AF which was done to avoid evaluating the
> access type in the PTW.
This one needs a bit more work. There is a couple of amusing bugs that
need addressing. Hopefully you can respin it shortly so that we can
have this series in 6.19.
> There's more work to be done for FEAT_HAFDBS (dirty state updates),
> since updates to DBM are conditional based on the walk context. Lastly,
> this is all horribly untested since I don't have an NV-capable machine
> with FEAT_HAF that is easy to hack on. Although I do plan on adding some
> selftests coverage soon.
Yup, it looks like systems accessible to the common mortals don't have
any of it (my QC box doesn't either). Oh well...
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
^ permalink raw reply [flat|nested] 19+ messages in thread