[PATCH v3 0/3] KVM: arm64: fix pKVM mapping cache corner cases

Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v3 0/3] KVM: arm64: fix pKVM mapping cache corner cases
@ 2026-06-24 16:00 Bradley Morgan
  2026-06-24 16:00 ` [PATCH v3 1/3] KVM: arm64: skip pKVM cache flushes for non cacheable mappings Bradley Morgan
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Bradley Morgan @ 2026-06-24 16:00 UTC (permalink / raw)
  To: Marc Zyngier, Oliver Upton
  Cc: Fuad Tabba, Joey Gouly, Steffen Eiden, Suzuki K Poulose,
	Zenghui Yu, Catalin Marinas, Will Deacon, Quentin Perret,
	Vincent Donnefort, Gavin Shan, Alexandru Elisei, linux-arm-kernel,
	kvmarm, linux-kernel, Bradley Morgan

This is a standalone v3.

Patch 1 fixes pKVM cache maintenance for non cacheable mappings without
growing struct pkvm_mapping.

Patch 2 fixes a pKVM mapping cache topup bug on permission faults that
replace page mappings with a PMD mapping.

Patch 3 fixes the generic dirty logging case where a permission fault
can still need a page table allocation to split a block mapping.

Changes in v3:
- Send as a standalone series with a cover letter.
- Store the pKVM cacheable bit in nr_pages instead of adding a bool.
- Drop stable from patch 1.
- Add patch 3 for dirty logging permission faults.

Changes in v2:
- Add patch 2 for the pKVM permission fault mapping cache bug.

Bradley Morgan (3):
  KVM: arm64: skip pKVM cache flushes for non cacheable mappings
  KVM: arm64: top up pKVM mapping cache for permission faults
  KVM: arm64: top up stage 2 memcache for dirty logging faults

 arch/arm64/kvm/mmu.c  | 32 +++++++++++++++++++--------
 arch/arm64/kvm/pkvm.c | 51 ++++++++++++++++++++++++++++++++++---------
 2 files changed, 64 insertions(+), 19 deletions(-)

-- 
2.53.0


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v3 1/3] KVM: arm64: skip pKVM cache flushes for non cacheable mappings
  2026-06-24 16:00 [PATCH v3 0/3] KVM: arm64: fix pKVM mapping cache corner cases Bradley Morgan
@ 2026-06-24 16:00 ` Bradley Morgan
  2026-07-01 13:31   ` Vincent Donnefort
  2026-07-01 16:05   ` Leonardo Bras
  2026-06-24 16:00 ` [PATCH v3 2/3] KVM: arm64: top up pKVM mapping cache for permission faults Bradley Morgan
  2026-06-24 16:00 ` [PATCH v3 3/3] KVM: arm64: top up stage 2 memcache for dirty logging faults Bradley Morgan
  2 siblings, 2 replies; 16+ messages in thread
From: Bradley Morgan @ 2026-06-24 16:00 UTC (permalink / raw)
  To: Marc Zyngier, Oliver Upton
  Cc: Fuad Tabba, Joey Gouly, Steffen Eiden, Suzuki K Poulose,
	Zenghui Yu, Catalin Marinas, Will Deacon, Quentin Perret,
	Vincent Donnefort, Gavin Shan, Alexandru Elisei, linux-arm-kernel,
	kvmarm, linux-kernel, Bradley Morgan

pKVM keeps its own mapping list for stage 2 operations. Its flush path
uses that list directly, so it lost the PTE attribute check done by the
generic stage 2 walker.

Record whether a mapping is cacheable and skip cache maintenance for
mappings that are not cacheable.

Fixes: e912efed485a ("KVM: arm64: Introduce the EL1 pKVM MMU")
Signed-off-by: Bradley Morgan <include@grrlz.net>
---
 arch/arm64/kvm/pkvm.c | 51 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 428723b1b0f5..ca6e823028c2 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -302,9 +302,32 @@ static u64 __pkvm_mapping_start(struct pkvm_mapping *m)
 	return m->gfn * PAGE_SIZE;
 }
 
+#define PKVM_MAPPING_NR_PAGES_MASK	GENMASK_ULL(47, 0)
+#define PKVM_MAPPING_CACHEABLE		BIT_ULL(48)
+
+static u64 pkvm_mapping_nr_pages(struct pkvm_mapping *m)
+{
+	return m->nr_pages & PKVM_MAPPING_NR_PAGES_MASK;
+}
+
+static bool pkvm_mapping_is_cacheable(struct pkvm_mapping *m)
+{
+	return m->nr_pages & PKVM_MAPPING_CACHEABLE;
+}
+
+static void pkvm_mapping_set_nr_pages(struct pkvm_mapping *m, u64 nr_pages,
+				      bool cacheable)
+{
+	WARN_ON_ONCE(nr_pages & ~PKVM_MAPPING_NR_PAGES_MASK);
+
+	m->nr_pages = nr_pages & PKVM_MAPPING_NR_PAGES_MASK;
+	if (cacheable)
+		m->nr_pages |= PKVM_MAPPING_CACHEABLE;
+}
+
 static u64 __pkvm_mapping_end(struct pkvm_mapping *m)
 {
-	return (m->gfn + m->nr_pages) * PAGE_SIZE - 1;
+	return (m->gfn + pkvm_mapping_nr_pages(m)) * PAGE_SIZE - 1;
 }
 
 INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64, __subtree_last,
@@ -350,7 +373,7 @@ static int __pkvm_pgtable_stage2_reclaim(struct kvm_pgtable *pgt, u64 start, u64
 			continue;
 
 		page = pfn_to_page(mapping->pfn);
-		WARN_ON_ONCE(mapping->nr_pages != 1);
+		WARN_ON_ONCE(pkvm_mapping_nr_pages(mapping) != 1);
 		unpin_user_pages_dirty_lock(&page, 1, true);
 		account_locked_vm(kvm->mm, 1, false);
 		pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
@@ -369,7 +392,7 @@ static int __pkvm_pgtable_stage2_unshare(struct kvm_pgtable *pgt, u64 start, u64
 
 	for_each_mapping_in_range_safe(pgt, start, end, mapping) {
 		ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn,
-					mapping->nr_pages);
+					pkvm_mapping_nr_pages(mapping));
 		if (WARN_ON(ret))
 			return ret;
 		pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
@@ -448,7 +471,7 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
 		 * permission faults are handled in the relax_perms() path.
 		 */
 		if (mapping) {
-			if (size == (mapping->nr_pages * PAGE_SIZE))
+			if (size == (pkvm_mapping_nr_pages(mapping) * PAGE_SIZE))
 				return -EAGAIN;
 
 			/*
@@ -472,7 +495,9 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
 	swap(mapping, cache->mapping);
 	mapping->gfn = gfn;
 	mapping->pfn = pfn;
-	mapping->nr_pages = size / PAGE_SIZE;
+	pkvm_mapping_set_nr_pages(mapping, size / PAGE_SIZE,
+				  !(prot & (KVM_PGTABLE_PROT_DEVICE |
+					    KVM_PGTABLE_PROT_NORMAL_NC)));
 	pkvm_mapping_insert(mapping, &pgt->pkvm_mappings);
 
 	return ret;
@@ -503,7 +528,7 @@ int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
 	lockdep_assert_held(&kvm->mmu_lock);
 	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
 		ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn,
-					mapping->nr_pages);
+					pkvm_mapping_nr_pages(mapping));
 		if (WARN_ON(ret))
 			break;
 	}
@@ -517,9 +542,13 @@ int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
 	struct pkvm_mapping *mapping;
 
 	lockdep_assert_held(&kvm->mmu_lock);
-	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
+	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
+		if (!pkvm_mapping_is_cacheable(mapping))
+			continue;
+
 		__clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn),
-					  PAGE_SIZE * mapping->nr_pages);
+					  PAGE_SIZE * pkvm_mapping_nr_pages(mapping));
+	}
 
 	return 0;
 }
@@ -536,8 +565,10 @@ bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64
 
 	lockdep_assert_held(&kvm->mmu_lock);
 	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
-		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
-					   mapping->nr_pages, mkold);
+		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest,
+					   handle, mapping->gfn,
+					   pkvm_mapping_nr_pages(mapping),
+					   mkold);
 
 	return young;
 }
-- 
2.53.0



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v3 2/3] KVM: arm64: top up pKVM mapping cache for permission faults
  2026-06-24 16:00 [PATCH v3 0/3] KVM: arm64: fix pKVM mapping cache corner cases Bradley Morgan
  2026-06-24 16:00 ` [PATCH v3 1/3] KVM: arm64: skip pKVM cache flushes for non cacheable mappings Bradley Morgan
@ 2026-06-24 16:00 ` Bradley Morgan
  2026-06-24 16:00 ` [PATCH v3 3/3] KVM: arm64: top up stage 2 memcache for dirty logging faults Bradley Morgan
  2 siblings, 0 replies; 16+ messages in thread
From: Bradley Morgan @ 2026-06-24 16:00 UTC (permalink / raw)
  To: Marc Zyngier, Oliver Upton
  Cc: Fuad Tabba, Joey Gouly, Steffen Eiden, Suzuki K Poulose,
	Zenghui Yu, Catalin Marinas, Will Deacon, Quentin Perret,
	Vincent Donnefort, Gavin Shan, Alexandru Elisei, linux-arm-kernel,
	kvmarm, linux-kernel, Bradley Morgan, stable

Permission faults normally only relax an existing leaf, so the fault path
does not top up the memcache.

With pKVM, a permission fault can also replace page mappings with a
PMD mapping. That path needs a fresh pkvm_mapping object, and can
dereference a NULL cache->mapping if the cache was not topped up.

Allocate just that object for pKVM permission faults.

The issue was discovered [1] by Sashiko.

Link: https://lore.kernel.org/all/20260623161545.EA08E1F000E9@smtp.kernel.org/ [1]

Fixes: db14091d8f75 ("KVM: arm64: Stage-2 huge mappings for np-guests")
Cc: stable@vger.kernel.org
Signed-off-by: Bradley Morgan <include@grrlz.net>
---
 arch/arm64/kvm/mmu.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 6c941aaa10c6..3f57f6825a33 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1177,17 +1177,26 @@ void free_hyp_memcache(struct kvm_hyp_memcache *mc)
 	__free_hyp_memcache(mc, hyp_mc_free_fn, kvm_host_va, mc);
 }
 
+static int topup_hyp_memcache_mapping(struct kvm_hyp_memcache *mc)
+{
+	if (mc->mapping)
+		return 0;
+
+	mc->mapping = kzalloc_obj(struct pkvm_mapping,
+				  GFP_KERNEL_ACCOUNT);
+	return mc->mapping ? 0 : -ENOMEM;
+}
+
 int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages)
 {
+	int ret;
+
 	if (!is_protected_kvm_enabled())
 		return 0;
 
-	if (!mc->mapping) {
-		mc->mapping = kzalloc_obj(struct pkvm_mapping,
-					  GFP_KERNEL_ACCOUNT);
-		if (!mc->mapping)
-			return -ENOMEM;
-	}
+	ret = topup_hyp_memcache_mapping(mc);
+	if (ret)
+		return ret;
 
 	return __topup_hyp_memcache(mc, min_pages, hyp_mc_alloc_fn,
 				    kvm_host_pa, mc);
@@ -2113,7 +2122,9 @@ static int user_mem_abort(const struct kvm_s2_fault_desc *s2fd)
 	 * Permission faults just need to update the existing leaf entry,
 	 * and so normally don't require allocations from the memcache. The
 	 * only exception to this is when dirty logging is enabled at runtime
-	 * and a write fault needs to collapse a block entry into a table.
+	 * and a write fault needs to collapse a block entry into a table. With
+	 * pKVM, they may still need a fresh mapping object if the fault turns
+	 * page entries into a block entry.
 	 */
 	memcache = get_mmu_memcache(s2fd->vcpu);
 	if (!perm_fault || (memslot_is_logging(s2fd->memslot) &&
@@ -2121,6 +2132,10 @@ static int user_mem_abort(const struct kvm_s2_fault_desc *s2fd)
 		ret = topup_mmu_memcache(s2fd->vcpu, memcache);
 		if (ret)
 			return ret;
+	} else if (is_protected_kvm_enabled()) {
+		ret = topup_hyp_memcache_mapping(memcache);
+		if (ret)
+			return ret;
 	}
 
 	/*
-- 
2.53.0



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v3 3/3] KVM: arm64: top up stage 2 memcache for dirty logging faults
  2026-06-24 16:00 [PATCH v3 0/3] KVM: arm64: fix pKVM mapping cache corner cases Bradley Morgan
  2026-06-24 16:00 ` [PATCH v3 1/3] KVM: arm64: skip pKVM cache flushes for non cacheable mappings Bradley Morgan
  2026-06-24 16:00 ` [PATCH v3 2/3] KVM: arm64: top up pKVM mapping cache for permission faults Bradley Morgan
@ 2026-06-24 16:00 ` Bradley Morgan
  2026-06-24 17:39   ` Bradley Morgan
  2 siblings, 1 reply; 16+ messages in thread
From: Bradley Morgan @ 2026-06-24 16:00 UTC (permalink / raw)
  To: Marc Zyngier, Oliver Upton
  Cc: Fuad Tabba, Joey Gouly, Steffen Eiden, Suzuki K Poulose,
	Zenghui Yu, Catalin Marinas, Will Deacon, Quentin Perret,
	Vincent Donnefort, Gavin Shan, Alexandru Elisei, linux-arm-kernel,
	kvmarm, linux-kernel, Bradley Morgan, stable

Dirty logging forces new stage 2 mappings down to page size, but
it does not always remove an existing block mapping before the next
fault. Eager splitting is best effort and is disabled by default.

A permission fault on such a block can still need a page table page
to install the smaller mapping. Top up the memcache for any permission
fault while dirty logging is active, not only for write faults.

The issue was discovered [1] by Sashiko.

Link: https://lore.kernel.org/all/59984F6D-06F2-4302-BDD7-92DF334E8FA0@grrlz.net/T/#t [1]

Fixes: 6f745f1bb5bf ("KVM: arm64: Convert user_mem_abort() to generic page-table API")
Cc: stable@vger.kernel.org
Signed-off-by: Bradley Morgan <include@grrlz.net>
---
 arch/arm64/kvm/mmu.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 3f57f6825a33..8911e319e6fa 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -2122,13 +2122,12 @@ static int user_mem_abort(const struct kvm_s2_fault_desc *s2fd)
 	 * Permission faults just need to update the existing leaf entry,
 	 * and so normally don't require allocations from the memcache. The
 	 * only exception to this is when dirty logging is enabled at runtime
-	 * and a write fault needs to collapse a block entry into a table. With
-	 * pKVM, they may still need a fresh mapping object if the fault turns
-	 * page entries into a block entry.
+	 * and a fault needs to collapse a block entry into a table. With pKVM,
+	 * they may still need a fresh mapping object if the fault turns page
+	 * entries into a block entry.
 	 */
 	memcache = get_mmu_memcache(s2fd->vcpu);
-	if (!perm_fault || (memslot_is_logging(s2fd->memslot) &&
-			    kvm_is_write_fault(s2fd->vcpu))) {
+	if (!perm_fault || memslot_is_logging(s2fd->memslot)) {
 		ret = topup_mmu_memcache(s2fd->vcpu, memcache);
 		if (ret)
 			return ret;
-- 
2.53.0



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 3/3] KVM: arm64: top up stage 2 memcache for dirty logging faults
  2026-06-24 16:00 ` [PATCH v3 3/3] KVM: arm64: top up stage 2 memcache for dirty logging faults Bradley Morgan
@ 2026-06-24 17:39   ` Bradley Morgan
  2026-06-24 17:46     ` Bradley Morgan
  0 siblings, 1 reply; 16+ messages in thread
From: Bradley Morgan @ 2026-06-24 17:39 UTC (permalink / raw)
  To: Marc Zyngier, Oliver Upton
  Cc: Fuad Tabba, Joey Gouly, Steffen Eiden, Suzuki K Poulose,
	Zenghui Yu, Catalin Marinas, Will Deacon, Quentin Perret,
	Vincent Donnefort, Gavin Shan, Alexandru Elisei, linux-arm-kernel,
	kvmarm, linux-kernel, stable

On June 24, 2026 5:00:28 PM GMT+01:00, Bradley Morgan <include@grrlz.net>
wrote:
>Dirty logging forces new stage 2 mappings down to page size, but
>it does not always remove an existing block mapping before the next
>fault. Eager splitting is best effort and is disabled by default.
>
>A permission fault on such a block can still need a page table page
>to install the smaller mapping. Top up the memcache for any permission
>fault while dirty logging is active, not only for write faults.
>
>The issue was discovered [1] by Sashiko.
>
>Link: https://lore.kernel.org/all/59984F6D-06F2-4302-BDD7-92DF334E8FA0@grrlz.net/T/#t [1]
>
>Fixes: 6f745f1bb5bf ("KVM: arm64: Convert user_mem_abort() to generic page-table API")
>Cc: stable@vger.kernel.org
>Signed-off-by: Bradley Morgan <include@grrlz.net>
>---
> arch/arm64/kvm/mmu.c | 9 ++++-----
> 1 file changed, 4 insertions(+), 5 deletions(-)
>
>diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
>index 3f57f6825a33..8911e319e6fa 100644
>--- a/arch/arm64/kvm/mmu.c
>+++ b/arch/arm64/kvm/mmu.c
>@@ -2122,13 +2122,12 @@ static int user_mem_abort(const struct kvm_s2_fault_desc *s2fd)
> 	 * Permission faults just need to update the existing leaf entry,
> 	 * and so normally don't require allocations from the memcache. The
> 	 * only exception to this is when dirty logging is enabled at runtime
>-	 * and a write fault needs to collapse a block entry into a table. With
>-	 * pKVM, they may still need a fresh mapping object if the fault turns
>-	 * page entries into a block entry.
>+	 * and a fault needs to collapse a block entry into a table. With pKVM,
>+	 * they may still need a fresh mapping object if the fault turns page
>+	 * entries into a block entry.
> 	 */
> 	memcache = get_mmu_memcache(s2fd->vcpu);
>-	if (!perm_fault || (memslot_is_logging(s2fd->memslot) &&
>-			    kvm_is_write_fault(s2fd->vcpu))) {
>+	if (!perm_fault || memslot_is_logging(s2fd->memslot)) {
> 		ret = topup_mmu_memcache(s2fd->vcpu, memcache);
> 		if (ret)
> 			return ret;
>

Note: Patch 3 seems to conflict because of patch 2 (the comments)


Oops! :(

V4 (after people have their review go), will contain one commit (patch
3) with the updated comments.

Patch 1 and 2 applies as usual.

Apologies for my messup. 

Thanks!


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 3/3] KVM: arm64: top up stage 2 memcache for dirty logging faults
  2026-06-24 17:39   ` Bradley Morgan
@ 2026-06-24 17:46     ` Bradley Morgan
  2026-06-24 18:25       ` Marc Zyngier
  0 siblings, 1 reply; 16+ messages in thread
From: Bradley Morgan @ 2026-06-24 17:46 UTC (permalink / raw)
  To: Marc Zyngier, Oliver Upton
  Cc: Fuad Tabba, Joey Gouly, Steffen Eiden, Suzuki K Poulose,
	Zenghui Yu, Catalin Marinas, Will Deacon, Quentin Perret,
	Vincent Donnefort, Gavin Shan, Alexandru Elisei, linux-arm-kernel,
	kvmarm, linux-kernel, stable

On June 24, 2026 6:39:16 PM GMT+01:00, Bradley Morgan <include@grrlz.net>
wrote:
>On June 24, 2026 5:00:28 PM GMT+01:00, Bradley Morgan <include@grrlz.net>
>wrote:
>>Dirty logging forces new stage 2 mappings down to page size, but
>>it does not always remove an existing block mapping before the next
>>fault. Eager splitting is best effort and is disabled by default.
>>
>>A permission fault on such a block can still need a page table page
>>to install the smaller mapping. Top up the memcache for any permission
>>fault while dirty logging is active, not only for write faults.
>>
>>The issue was discovered [1] by Sashiko.
>>
>>Link:
>https://lore.kernel.org/all/59984F6D-06F2-4302-BDD7-92DF334E8FA0@grrlz.net/T/#t
>[1]
>>
>>Fixes: 6f745f1bb5bf ("KVM: arm64: Convert user_mem_abort() to generic
>page-table API")
>>Cc: stable@vger.kernel.org
>>Signed-off-by: Bradley Morgan <include@grrlz.net>
>>---
>> arch/arm64/kvm/mmu.c | 9 ++++-----
>> 1 file changed, 4 insertions(+), 5 deletions(-)
>>
>>diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
>>index 3f57f6825a33..8911e319e6fa 100644
>>--- a/arch/arm64/kvm/mmu.c
>>+++ b/arch/arm64/kvm/mmu.c
>>@@ -2122,13 +2122,12 @@ static int user_mem_abort(const struct
>kvm_s2_fault_desc *s2fd)
>> 	 * Permission faults just need to update the existing leaf entry,
>> 	 * and so normally don't require allocations from the memcache. The
>> 	 * only exception to this is when dirty logging is enabled at runtime
>>-	 * and a write fault needs to collapse a block entry into a table. With
>>-	 * pKVM, they may still need a fresh mapping object if the fault turns
>>-	 * page entries into a block entry.
>>+	 * and a fault needs to collapse a block entry into a table. With pKVM,
>>+	 * they may still need a fresh mapping object if the fault turns page
>>+	 * entries into a block entry.
>> 	 */
>> 	memcache = get_mmu_memcache(s2fd->vcpu);
>>-	if (!perm_fault || (memslot_is_logging(s2fd->memslot) &&
>>-			    kvm_is_write_fault(s2fd->vcpu))) {
>>+	if (!perm_fault || memslot_is_logging(s2fd->memslot)) {
>> 		ret = topup_mmu_memcache(s2fd->vcpu, memcache);
>> 		if (ret)
>> 			return ret;
>>
>
>Note: Patch 3 seems to conflict because of patch 2 (the comments)
>
>
>Oops! :(
>
>V4 (after people have their review go), will contain one commit (patch
>3) with the updated comments.
>
>Patch 1 and 2 applies as usual.
>
>Apologies for my messup. 
>
>Thanks!


Actually. Hmm.

I'll just drop patches 2 and 3, I'll do them at a later date, please
disregard patches 2 and 3, patch 1 doesn't rely on 2 and 3..

If you guys wanna have a look feel free! :)

Thanks!


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 3/3] KVM: arm64: top up stage 2 memcache for dirty logging faults
  2026-06-24 17:46     ` Bradley Morgan
@ 2026-06-24 18:25       ` Marc Zyngier
  2026-06-24 18:37         ` Bradley Morgan
  0 siblings, 1 reply; 16+ messages in thread
From: Marc Zyngier @ 2026-06-24 18:25 UTC (permalink / raw)
  To: Bradley Morgan
  Cc: Oliver Upton, Fuad Tabba, Joey Gouly, Steffen Eiden,
	Suzuki K Poulose, Zenghui Yu, Catalin Marinas, Will Deacon,
	Quentin Perret, Vincent Donnefort, Gavin Shan, Alexandru Elisei,
	linux-arm-kernel, kvmarm, linux-kernel, stable

On Wed, 24 Jun 2026 18:46:10 +0100,
Bradley Morgan <include@grrlz.net> wrote:
> 
> On June 24, 2026 6:39:16 PM GMT+01:00, Bradley Morgan <include@grrlz.net>
> wrote:
> >
> >Note: Patch 3 seems to conflict because of patch 2 (the comments)
> >
> >
> >Oops! :(
> >
> >V4 (after people have their review go), will contain one commit (patch
> >3) with the updated comments.
> >
> >Patch 1 and 2 applies as usual.
> >
> >Apologies for my messup. 
> >
> >Thanks!
> 
> 
> Actually. Hmm.
> 
> I'll just drop patches 2 and 3, I'll do them at a later date, please
> disregard patches 2 and 3, patch 1 doesn't rely on 2 and 3..
> 
> If you guys wanna have a look feel free! :)

As I suggested in my reply to your hasty v2, taking a few *days*
between versions is generally a good thing. it gives the reviewers
time to chime in, and gives you the opportunity to reflect on what
you've just written (reading your own patches after a few days is a
sure way to go and rewrite them).

Actually, by posting more often, you are guaranteeing that people
*avoid* reviewing your stuff, since odds are that there is a new
version coming in the next 10 minutes, so why bother...

But hey, that's free advice, so it's probably worthless.

	M.

-- 
Without deviation from the norm, progress is not possible.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 3/3] KVM: arm64: top up stage 2 memcache for dirty logging faults
  2026-06-24 18:25       ` Marc Zyngier
@ 2026-06-24 18:37         ` Bradley Morgan
  0 siblings, 0 replies; 16+ messages in thread
From: Bradley Morgan @ 2026-06-24 18:37 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: Oliver Upton, Fuad Tabba, Joey Gouly, Steffen Eiden,
	Suzuki K Poulose, Zenghui Yu, Catalin Marinas, Will Deacon,
	Quentin Perret, Vincent Donnefort, Gavin Shan, Alexandru Elisei,
	linux-arm-kernel, kvmarm, linux-kernel, stable

On June 24, 2026 7:25:04 PM GMT+01:00, Marc Zyngier <maz@kernel.org> wrote:
>On Wed, 24 Jun 2026 18:46:10 +0100,
>Bradley Morgan <include@grrlz.net> wrote:
>> 
>> On June 24, 2026 6:39:16 PM GMT+01:00, Bradley Morgan
><include@grrlz.net>
>> wrote:
>> >
>> >Note: Patch 3 seems to conflict because of patch 2 (the comments)
>> >
>> >
>> >Oops! :(
>> >
>> >V4 (after people have their review go), will contain one commit (patch
>> >3) with the updated comments.
>> >
>> >Patch 1 and 2 applies as usual.
>> >
>> >Apologies for my messup. 
>> >
>> >Thanks!
>> 
>> 
>> Actually. Hmm.
>> 
>> I'll just drop patches 2 and 3, I'll do them at a later date, please
>> disregard patches 2 and 3, patch 1 doesn't rely on 2 and 3..
>> 
>> If you guys wanna have a look feel free! :)
>
>As I suggested in my reply to your hasty v2, taking a few *days*
>between versions is generally a good thing. it gives the reviewers
>time to chime in, and gives you the opportunity to reflect on what
>you've just written (reading your own patches after a few days is a
>sure way to go and rewrite them).

welp, I guess I learned the hard way with the hastiness of my V3, (that's
kinda why I dropped p2 and p3)


>Actually, by posting more often, you are guaranteeing that people
>*avoid* reviewing your stuff, since odds are that there is a new
>version coming in the next 10 minutes, so why bother...

I'm not planning on posting another version, until fuller review concludes


>But hey, that's free advice, so it's probably worthless.

advice from someone with experience isn't worthless. 

just stupid me decided to unpark my V3 and send it :(

Sorry about that..

>	M.
>
>

Thanks!


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/3] KVM: arm64: skip pKVM cache flushes for non cacheable mappings
  2026-06-24 16:00 ` [PATCH v3 1/3] KVM: arm64: skip pKVM cache flushes for non cacheable mappings Bradley Morgan
@ 2026-07-01 13:31   ` Vincent Donnefort
  2026-07-01 16:05   ` Leonardo Bras
  1 sibling, 0 replies; 16+ messages in thread
From: Vincent Donnefort @ 2026-07-01 13:31 UTC (permalink / raw)
  To: Bradley Morgan
  Cc: Marc Zyngier, Oliver Upton, Fuad Tabba, Joey Gouly, Steffen Eiden,
	Suzuki K Poulose, Zenghui Yu, Catalin Marinas, Will Deacon,
	Quentin Perret, Gavin Shan, Alexandru Elisei, linux-arm-kernel,
	kvmarm, linux-kernel

On Wed, Jun 24, 2026 at 04:00:26PM +0000, Bradley Morgan wrote:
> pKVM keeps its own mapping list for stage 2 operations. Its flush path
> uses that list directly, so it lost the PTE attribute check done by the
> generic stage 2 walker.
> 
> Record whether a mapping is cacheable and skip cache maintenance for
> mappings that are not cacheable.
> 
> Fixes: e912efed485a ("KVM: arm64: Introduce the EL1 pKVM MMU")
> Signed-off-by: Bradley Morgan <include@grrlz.net>
> ---
>  arch/arm64/kvm/pkvm.c | 51 ++++++++++++++++++++++++++++++++++---------
>  1 file changed, 41 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
> index 428723b1b0f5..ca6e823028c2 100644
> --- a/arch/arm64/kvm/pkvm.c
> +++ b/arch/arm64/kvm/pkvm.c
> @@ -302,9 +302,32 @@ static u64 __pkvm_mapping_start(struct pkvm_mapping *m)
>  	return m->gfn * PAGE_SIZE;
>  }
>  
> +#define PKVM_MAPPING_NR_PAGES_MASK	GENMASK_ULL(47, 0)
> +#define PKVM_MAPPING_CACHEABLE		BIT_ULL(48)

Probably better to make it "_NC". Protected VMs only support cacheable and they
also use struct pkvm_mapping. 

> +
> +static u64 pkvm_mapping_nr_pages(struct pkvm_mapping *m)
> +{
> +	return m->nr_pages & PKVM_MAPPING_NR_PAGES_MASK;
> +}
> +
> +static bool pkvm_mapping_is_cacheable(struct pkvm_mapping *m)
> +{
> +	return m->nr_pages & PKVM_MAPPING_CACHEABLE;
> +}
> +
> +static void pkvm_mapping_set_nr_pages(struct pkvm_mapping *m, u64 nr_pages,
> +				      bool cacheable)
> +{
> +	WARN_ON_ONCE(nr_pages & ~PKVM_MAPPING_NR_PAGES_MASK);
> +
> +	m->nr_pages = nr_pages & PKVM_MAPPING_NR_PAGES_MASK;
> +	if (cacheable)
> +		m->nr_pages |= PKVM_MAPPING_CACHEABLE;
> +}
> +
>  static u64 __pkvm_mapping_end(struct pkvm_mapping *m)
>  {
> -	return (m->gfn + m->nr_pages) * PAGE_SIZE - 1;
> +	return (m->gfn + pkvm_mapping_nr_pages(m)) * PAGE_SIZE - 1;
>  }

Perhaps using a bitfield would heavily simplify this code?

struct pkvm_mapping {
  ...
  u64 nr_pages : 63
  u64 flags : 1
}

Or alternatively, could just make nr_pages u32 and flags u32. nr_pages will not
exceed PMD_SIZE / PAGE_SIZE, which is at worst 8192 on 64K systems.

>  
>  INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64, __subtree_last,
> @@ -350,7 +373,7 @@ static int __pkvm_pgtable_stage2_reclaim(struct kvm_pgtable *pgt, u64 start, u64
>  			continue;
>  
>  		page = pfn_to_page(mapping->pfn);
> -		WARN_ON_ONCE(mapping->nr_pages != 1);
> +		WARN_ON_ONCE(pkvm_mapping_nr_pages(mapping) != 1);
>  		unpin_user_pages_dirty_lock(&page, 1, true);
>  		account_locked_vm(kvm->mm, 1, false);
>  		pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
> @@ -369,7 +392,7 @@ static int __pkvm_pgtable_stage2_unshare(struct kvm_pgtable *pgt, u64 start, u64
>  
>  	for_each_mapping_in_range_safe(pgt, start, end, mapping) {
>  		ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn,
> -					mapping->nr_pages);
> +					pkvm_mapping_nr_pages(mapping));
>  		if (WARN_ON(ret))
>  			return ret;
>  		pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
> @@ -448,7 +471,7 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
>  		 * permission faults are handled in the relax_perms() path.
>  		 */
>  		if (mapping) {
> -			if (size == (mapping->nr_pages * PAGE_SIZE))
> +			if (size == (pkvm_mapping_nr_pages(mapping) * PAGE_SIZE))
>  				return -EAGAIN;
>  
>  			/*
> @@ -472,7 +495,9 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
>  	swap(mapping, cache->mapping);
>  	mapping->gfn = gfn;
>  	mapping->pfn = pfn;
> -	mapping->nr_pages = size / PAGE_SIZE;
> +	pkvm_mapping_set_nr_pages(mapping, size / PAGE_SIZE,
> +				  !(prot & (KVM_PGTABLE_PROT_DEVICE |
> +					    KVM_PGTABLE_PROT_NORMAL_NC)));
>  	pkvm_mapping_insert(mapping, &pgt->pkvm_mappings);
>  
>  	return ret;
> @@ -503,7 +528,7 @@ int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
>  	lockdep_assert_held(&kvm->mmu_lock);
>  	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
>  		ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn,
> -					mapping->nr_pages);
> +					pkvm_mapping_nr_pages(mapping));
>  		if (WARN_ON(ret))
>  			break;
>  	}
> @@ -517,9 +542,13 @@ int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
>  	struct pkvm_mapping *mapping;
>  
>  	lockdep_assert_held(&kvm->mmu_lock);
> -	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
> +	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
> +		if (!pkvm_mapping_is_cacheable(mapping))
> +			continue;
> +
>  		__clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn),
> -					  PAGE_SIZE * mapping->nr_pages);
> +					  PAGE_SIZE * pkvm_mapping_nr_pages(mapping));
> +	}
>  
>  	return 0;
>  }
> @@ -536,8 +565,10 @@ bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64
>  
>  	lockdep_assert_held(&kvm->mmu_lock);
>  	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
> -		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
> -					   mapping->nr_pages, mkold);
> +		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest,
> +					   handle, mapping->gfn,
> +					   pkvm_mapping_nr_pages(mapping),
> +					   mkold);
>  
>  	return young;
>  }
> -- 
> 2.53.0
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/3] KVM: arm64: skip pKVM cache flushes for non cacheable mappings
  2026-06-24 16:00 ` [PATCH v3 1/3] KVM: arm64: skip pKVM cache flushes for non cacheable mappings Bradley Morgan
  2026-07-01 13:31   ` Vincent Donnefort
@ 2026-07-01 16:05   ` Leonardo Bras
  2026-07-01 16:40     ` Bradley Morgan
  1 sibling, 1 reply; 16+ messages in thread
From: Leonardo Bras @ 2026-07-01 16:05 UTC (permalink / raw)
  To: Bradley Morgan
  Cc: Leonardo Bras, Marc Zyngier, Oliver Upton, Fuad Tabba, Joey Gouly,
	Steffen Eiden, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
	Will Deacon, Quentin Perret, Vincent Donnefort, Gavin Shan,
	Alexandru Elisei, linux-arm-kernel, kvmarm, linux-kernel

On Wed, Jun 24, 2026 at 04:00:26PM +0000, Bradley Morgan wrote:
> pKVM keeps its own mapping list for stage 2 operations. Its flush path
> uses that list directly, so it lost the PTE attribute check done by the
> generic stage 2 walker.
> 
> Record whether a mapping is cacheable and skip cache maintenance for
> mappings that are not cacheable.
> 
> Fixes: e912efed485a ("KVM: arm64: Introduce the EL1 pKVM MMU")
> Signed-off-by: Bradley Morgan <include@grrlz.net>
> ---
>  arch/arm64/kvm/pkvm.c | 51 ++++++++++++++++++++++++++++++++++---------
>  1 file changed, 41 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
> index 428723b1b0f5..ca6e823028c2 100644
> --- a/arch/arm64/kvm/pkvm.c
> +++ b/arch/arm64/kvm/pkvm.c
> @@ -302,9 +302,32 @@ static u64 __pkvm_mapping_start(struct pkvm_mapping *m)
>  	return m->gfn * PAGE_SIZE;
>  }
>  
> +#define PKVM_MAPPING_NR_PAGES_MASK	GENMASK_ULL(47, 0)
> +#define PKVM_MAPPING_CACHEABLE		BIT_ULL(48)

Out of curiosity here, why do you choose to use bit 48 here instead of, 
let's say, bit 63?

(I know it makes absolutely no difference to inner working here, as there 
should probably not be 2^48 pages in one mapping.)

Thanks!
Leo

> +
> +static u64 pkvm_mapping_nr_pages(struct pkvm_mapping *m)
> +{
> +	return m->nr_pages & PKVM_MAPPING_NR_PAGES_MASK;
> +}
> +
> +static bool pkvm_mapping_is_cacheable(struct pkvm_mapping *m)
> +{
> +	return m->nr_pages & PKVM_MAPPING_CACHEABLE;
> +}
> +
> +static void pkvm_mapping_set_nr_pages(struct pkvm_mapping *m, u64 nr_pages,
> +				      bool cacheable)
> +{
> +	WARN_ON_ONCE(nr_pages & ~PKVM_MAPPING_NR_PAGES_MASK);
> +
> +	m->nr_pages = nr_pages & PKVM_MAPPING_NR_PAGES_MASK;
> +	if (cacheable)
> +		m->nr_pages |= PKVM_MAPPING_CACHEABLE;
> +}
> +
>  static u64 __pkvm_mapping_end(struct pkvm_mapping *m)
>  {
> -	return (m->gfn + m->nr_pages) * PAGE_SIZE - 1;
> +	return (m->gfn + pkvm_mapping_nr_pages(m)) * PAGE_SIZE - 1;
>  }
>  
>  INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64, __subtree_last,
> @@ -350,7 +373,7 @@ static int __pkvm_pgtable_stage2_reclaim(struct kvm_pgtable *pgt, u64 start, u64
>  			continue;
>  
>  		page = pfn_to_page(mapping->pfn);
> -		WARN_ON_ONCE(mapping->nr_pages != 1);
> +		WARN_ON_ONCE(pkvm_mapping_nr_pages(mapping) != 1);
>  		unpin_user_pages_dirty_lock(&page, 1, true);
>  		account_locked_vm(kvm->mm, 1, false);
>  		pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
> @@ -369,7 +392,7 @@ static int __pkvm_pgtable_stage2_unshare(struct kvm_pgtable *pgt, u64 start, u64
>  
>  	for_each_mapping_in_range_safe(pgt, start, end, mapping) {
>  		ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn,
> -					mapping->nr_pages);
> +					pkvm_mapping_nr_pages(mapping));
>  		if (WARN_ON(ret))
>  			return ret;
>  		pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
> @@ -448,7 +471,7 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
>  		 * permission faults are handled in the relax_perms() path.
>  		 */
>  		if (mapping) {
> -			if (size == (mapping->nr_pages * PAGE_SIZE))
> +			if (size == (pkvm_mapping_nr_pages(mapping) * PAGE_SIZE))
>  				return -EAGAIN;
>  
>  			/*
> @@ -472,7 +495,9 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
>  	swap(mapping, cache->mapping);
>  	mapping->gfn = gfn;
>  	mapping->pfn = pfn;
> -	mapping->nr_pages = size / PAGE_SIZE;
> +	pkvm_mapping_set_nr_pages(mapping, size / PAGE_SIZE,
> +				  !(prot & (KVM_PGTABLE_PROT_DEVICE |
> +					    KVM_PGTABLE_PROT_NORMAL_NC)));
>  	pkvm_mapping_insert(mapping, &pgt->pkvm_mappings);
>  
>  	return ret;
> @@ -503,7 +528,7 @@ int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
>  	lockdep_assert_held(&kvm->mmu_lock);
>  	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
>  		ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn,
> -					mapping->nr_pages);
> +					pkvm_mapping_nr_pages(mapping));
>  		if (WARN_ON(ret))
>  			break;
>  	}
> @@ -517,9 +542,13 @@ int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
>  	struct pkvm_mapping *mapping;
>  
>  	lockdep_assert_held(&kvm->mmu_lock);
> -	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
> +	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
> +		if (!pkvm_mapping_is_cacheable(mapping))
> +			continue;
> +
>  		__clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn),
> -					  PAGE_SIZE * mapping->nr_pages);
> +					  PAGE_SIZE * pkvm_mapping_nr_pages(mapping));
> +	}
>  
>  	return 0;
>  }
> @@ -536,8 +565,10 @@ bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64
>  
>  	lockdep_assert_held(&kvm->mmu_lock);
>  	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
> -		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
> -					   mapping->nr_pages, mkold);
> +		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest,
> +					   handle, mapping->gfn,
> +					   pkvm_mapping_nr_pages(mapping),
> +					   mkold);
>  
>  	return young;
>  }
> -- 
> 2.53.0
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/3] KVM: arm64: skip pKVM cache flushes for non cacheable mappings
  2026-07-01 16:05   ` Leonardo Bras
@ 2026-07-01 16:40     ` Bradley Morgan
  2026-07-01 16:53       ` Leonardo Bras
  0 siblings, 1 reply; 16+ messages in thread
From: Bradley Morgan @ 2026-07-01 16:40 UTC (permalink / raw)
  To: Leonardo Bras
  Cc: Marc Zyngier, Oliver Upton, Fuad Tabba, Joey Gouly, Steffen Eiden,
	Suzuki K Poulose, Zenghui Yu, Catalin Marinas, Will Deacon,
	Quentin Perret, Vincent Donnefort, Gavin Shan, Alexandru Elisei,
	linux-arm-kernel, kvmarm, linux-kernel

On July 1, 2026 5:05:53 PM GMT+01:00, Leonardo Bras <leo.bras@arm.com>
wrote:
>On Wed, Jun 24, 2026 at 04:00:26PM +0000, Bradley Morgan wrote:
>> pKVM keeps its own mapping list for stage 2 operations. Its flush path
>> uses that list directly, so it lost the PTE attribute check done by the
>> generic stage 2 walker.
>> 
>> Record whether a mapping is cacheable and skip cache maintenance for
>> mappings that are not cacheable.
>> 
>> Fixes: e912efed485a ("KVM: arm64: Introduce the EL1 pKVM MMU")
>> Signed-off-by: Bradley Morgan <include@grrlz.net>
>> ---
>>  arch/arm64/kvm/pkvm.c | 51 ++++++++++++++++++++++++++++++++++---------
>>  1 file changed, 41 insertions(+), 10 deletions(-)
>> 
>> diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
>> index 428723b1b0f5..ca6e823028c2 100644
>> --- a/arch/arm64/kvm/pkvm.c
>> +++ b/arch/arm64/kvm/pkvm.c
>> @@ -302,9 +302,32 @@ static u64 __pkvm_mapping_start(struct pkvm_mapping
>*m)
>>  	return m->gfn * PAGE_SIZE;
>>  }
>>  
>> +#define PKVM_MAPPING_NR_PAGES_MASK	GENMASK_ULL(47, 0)
>> +#define PKVM_MAPPING_CACHEABLE		BIT_ULL(48)
>
>Out of curiosity here, why do you choose to use bit 48 here instead of, 
>let's say, bit 63?
>
>(I know it makes absolutely no difference to inner working here, as there 
>should probably not be 2^48 pages in one mapping.)
>
>Thanks!
>Leo


sup Leo, here's a quote from maz 

"This thing is already big enough, let's not add a bool right in the
middle (use pahole to find out why this is bad). Given that nr_pages
is for a range, and that the minimum page size uses 12 bits, the
largest number of pages you can have here is 56-12=48 bit wide. That's
another 16 bits worth of flags you can use."

this should just clarify things, any questions, feel more than free to ask!

(btw V4 is coming soon)

>> +
>> +static u64 pkvm_mapping_nr_pages(struct pkvm_mapping *m)
>> +{
>> +	return m->nr_pages & PKVM_MAPPING_NR_PAGES_MASK;
>> +}
>> +
>> +static bool pkvm_mapping_is_cacheable(struct pkvm_mapping *m)
>> +{
>> +	return m->nr_pages & PKVM_MAPPING_CACHEABLE;
>> +}
>> +
>> +static void pkvm_mapping_set_nr_pages(struct pkvm_mapping *m, u64
>nr_pages,
>> +				      bool cacheable)
>> +{
>> +	WARN_ON_ONCE(nr_pages & ~PKVM_MAPPING_NR_PAGES_MASK);
>> +
>> +	m->nr_pages = nr_pages & PKVM_MAPPING_NR_PAGES_MASK;
>> +	if (cacheable)
>> +		m->nr_pages |= PKVM_MAPPING_CACHEABLE;
>> +}
>> +
>>  static u64 __pkvm_mapping_end(struct pkvm_mapping *m)
>>  {
>> -	return (m->gfn + m->nr_pages) * PAGE_SIZE - 1;
>> +	return (m->gfn + pkvm_mapping_nr_pages(m)) * PAGE_SIZE - 1;
>>  }
>>  
>>  INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64, __subtree_last,
>> @@ -350,7 +373,7 @@ static int __pkvm_pgtable_stage2_reclaim(struct
>kvm_pgtable *pgt, u64 start, u64
>>  			continue;
>>  
>>  		page = pfn_to_page(mapping->pfn);
>> -		WARN_ON_ONCE(mapping->nr_pages != 1);
>> +		WARN_ON_ONCE(pkvm_mapping_nr_pages(mapping) != 1);
>>  		unpin_user_pages_dirty_lock(&page, 1, true);
>>  		account_locked_vm(kvm->mm, 1, false);
>>  		pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
>> @@ -369,7 +392,7 @@ static int __pkvm_pgtable_stage2_unshare(struct
>kvm_pgtable *pgt, u64 start, u64
>>  
>>  	for_each_mapping_in_range_safe(pgt, start, end, mapping) {
>>  		ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn,
>> -					mapping->nr_pages);
>> +					pkvm_mapping_nr_pages(mapping));
>>  		if (WARN_ON(ret))
>>  			return ret;
>>  		pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
>> @@ -448,7 +471,7 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt,
>u64 addr, u64 size,
>>  		 * permission faults are handled in the relax_perms() path.
>>  		 */
>>  		if (mapping) {
>> -			if (size == (mapping->nr_pages * PAGE_SIZE))
>> +			if (size == (pkvm_mapping_nr_pages(mapping) * PAGE_SIZE))
>>  				return -EAGAIN;
>>  
>>  			/*
>> @@ -472,7 +495,9 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt,
>u64 addr, u64 size,
>>  	swap(mapping, cache->mapping);
>>  	mapping->gfn = gfn;
>>  	mapping->pfn = pfn;
>> -	mapping->nr_pages = size / PAGE_SIZE;
>> +	pkvm_mapping_set_nr_pages(mapping, size / PAGE_SIZE,
>> +				  !(prot & (KVM_PGTABLE_PROT_DEVICE |
>> +					    KVM_PGTABLE_PROT_NORMAL_NC)));
>>  	pkvm_mapping_insert(mapping, &pgt->pkvm_mappings);
>>  
>>  	return ret;
>> @@ -503,7 +528,7 @@ int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable
>*pgt, u64 addr, u64 size)
>>  	lockdep_assert_held(&kvm->mmu_lock);
>>  	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
>>  		ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn,
>> -					mapping->nr_pages);
>> +					pkvm_mapping_nr_pages(mapping));
>>  		if (WARN_ON(ret))
>>  			break;
>>  	}
>> @@ -517,9 +542,13 @@ int pkvm_pgtable_stage2_flush(struct kvm_pgtable
>*pgt, u64 addr, u64 size)
>>  	struct pkvm_mapping *mapping;
>>  
>>  	lockdep_assert_held(&kvm->mmu_lock);
>> -	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
>> +	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
>> +		if (!pkvm_mapping_is_cacheable(mapping))
>> +			continue;
>> +
>>  		__clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn),
>> -					  PAGE_SIZE * mapping->nr_pages);
>> +					  PAGE_SIZE * pkvm_mapping_nr_pages(mapping));
>> +	}
>>  
>>  	return 0;
>>  }
>> @@ -536,8 +565,10 @@ bool pkvm_pgtable_stage2_test_clear_young(struct
>kvm_pgtable *pgt, u64 addr, u64
>>  
>>  	lockdep_assert_held(&kvm->mmu_lock);
>>  	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
>> -		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
>> -					   mapping->nr_pages, mkold);
>> +		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest,
>> +					   handle, mapping->gfn,
>> +					   pkvm_mapping_nr_pages(mapping),
>> +					   mkold);
>>  
>>  	return young;
>>  }
>> -- 
>> 2.53.0
>> 
>

Thanks!


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/3] KVM: arm64: skip pKVM cache flushes for non cacheable mappings
  2026-07-01 16:40     ` Bradley Morgan
@ 2026-07-01 16:53       ` Leonardo Bras
  2026-07-01 16:54         ` Bradley Morgan
  0 siblings, 1 reply; 16+ messages in thread
From: Leonardo Bras @ 2026-07-01 16:53 UTC (permalink / raw)
  To: Bradley Morgan
  Cc: Leonardo Bras, Marc Zyngier, Oliver Upton, Fuad Tabba, Joey Gouly,
	Steffen Eiden, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
	Will Deacon, Quentin Perret, Vincent Donnefort, Gavin Shan,
	Alexandru Elisei, linux-arm-kernel, kvmarm, linux-kernel

On Wed, Jul 01, 2026 at 05:40:46PM +0100, Bradley Morgan wrote:
> On July 1, 2026 5:05:53 PM GMT+01:00, Leonardo Bras <leo.bras@arm.com>
> wrote:
> >On Wed, Jun 24, 2026 at 04:00:26PM +0000, Bradley Morgan wrote:
> >> pKVM keeps its own mapping list for stage 2 operations. Its flush path
> >> uses that list directly, so it lost the PTE attribute check done by the
> >> generic stage 2 walker.
> >> 
> >> Record whether a mapping is cacheable and skip cache maintenance for
> >> mappings that are not cacheable.
> >> 
> >> Fixes: e912efed485a ("KVM: arm64: Introduce the EL1 pKVM MMU")
> >> Signed-off-by: Bradley Morgan <include@grrlz.net>
> >> ---
> >>  arch/arm64/kvm/pkvm.c | 51 ++++++++++++++++++++++++++++++++++---------
> >>  1 file changed, 41 insertions(+), 10 deletions(-)
> >> 
> >> diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
> >> index 428723b1b0f5..ca6e823028c2 100644
> >> --- a/arch/arm64/kvm/pkvm.c
> >> +++ b/arch/arm64/kvm/pkvm.c
> >> @@ -302,9 +302,32 @@ static u64 __pkvm_mapping_start(struct pkvm_mapping
> >*m)
> >>  	return m->gfn * PAGE_SIZE;
> >>  }
> >>  
> >> +#define PKVM_MAPPING_NR_PAGES_MASK	GENMASK_ULL(47, 0)
> >> +#define PKVM_MAPPING_CACHEABLE		BIT_ULL(48)
> >
> >Out of curiosity here, why do you choose to use bit 48 here instead of, 
> >let's say, bit 63?
> >
> >(I know it makes absolutely no difference to inner working here, as there 
> >should probably not be 2^48 pages in one mapping.)
> >
> >Thanks!
> >Leo
> 
> 
> sup Leo, here's a quote from maz 

Hi Bradley,

> 
> "This thing is already big enough, let's not add a bool right in the
> middle (use pahole to find out why this is bad).

I suppose you proposed to add a bool into a struct, maybe?
It would screw the struct alignment.

> Given that nr_pages
> is for a range, and that the minimum page size uses 12 bits, the
> largest number of pages you can have here is 56-12=48 bit wide. That's
> another 16 bits worth of flags you can use."

Humm, makes sense.
And since he mentions 16 bits worth of flags, you start by using the 48th 
bit. Ok, got your rationale.

(I would possibly start with the 63, though, but that's more on personal 
taste)

> 
> this should just clarify things, any questions, feel more than free to ask!
> 
> (btw V4 is coming soon)

Thanks!
Leo

> 
> >> +
> >> +static u64 pkvm_mapping_nr_pages(struct pkvm_mapping *m)
> >> +{
> >> +	return m->nr_pages & PKVM_MAPPING_NR_PAGES_MASK;
> >> +}
> >> +
> >> +static bool pkvm_mapping_is_cacheable(struct pkvm_mapping *m)
> >> +{
> >> +	return m->nr_pages & PKVM_MAPPING_CACHEABLE;
> >> +}
> >> +
> >> +static void pkvm_mapping_set_nr_pages(struct pkvm_mapping *m, u64
> >nr_pages,
> >> +				      bool cacheable)
> >> +{
> >> +	WARN_ON_ONCE(nr_pages & ~PKVM_MAPPING_NR_PAGES_MASK);
> >> +
> >> +	m->nr_pages = nr_pages & PKVM_MAPPING_NR_PAGES_MASK;
> >> +	if (cacheable)
> >> +		m->nr_pages |= PKVM_MAPPING_CACHEABLE;
> >> +}
> >> +
> >>  static u64 __pkvm_mapping_end(struct pkvm_mapping *m)
> >>  {
> >> -	return (m->gfn + m->nr_pages) * PAGE_SIZE - 1;
> >> +	return (m->gfn + pkvm_mapping_nr_pages(m)) * PAGE_SIZE - 1;
> >>  }
> >>  
> >>  INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64, __subtree_last,
> >> @@ -350,7 +373,7 @@ static int __pkvm_pgtable_stage2_reclaim(struct
> >kvm_pgtable *pgt, u64 start, u64
> >>  			continue;
> >>  
> >>  		page = pfn_to_page(mapping->pfn);
> >> -		WARN_ON_ONCE(mapping->nr_pages != 1);
> >> +		WARN_ON_ONCE(pkvm_mapping_nr_pages(mapping) != 1);
> >>  		unpin_user_pages_dirty_lock(&page, 1, true);
> >>  		account_locked_vm(kvm->mm, 1, false);
> >>  		pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
> >> @@ -369,7 +392,7 @@ static int __pkvm_pgtable_stage2_unshare(struct
> >kvm_pgtable *pgt, u64 start, u64
> >>  
> >>  	for_each_mapping_in_range_safe(pgt, start, end, mapping) {
> >>  		ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn,
> >> -					mapping->nr_pages);
> >> +					pkvm_mapping_nr_pages(mapping));
> >>  		if (WARN_ON(ret))
> >>  			return ret;
> >>  		pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
> >> @@ -448,7 +471,7 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt,
> >u64 addr, u64 size,
> >>  		 * permission faults are handled in the relax_perms() path.
> >>  		 */
> >>  		if (mapping) {
> >> -			if (size == (mapping->nr_pages * PAGE_SIZE))
> >> +			if (size == (pkvm_mapping_nr_pages(mapping) * PAGE_SIZE))
> >>  				return -EAGAIN;
> >>  
> >>  			/*
> >> @@ -472,7 +495,9 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt,
> >u64 addr, u64 size,
> >>  	swap(mapping, cache->mapping);
> >>  	mapping->gfn = gfn;
> >>  	mapping->pfn = pfn;
> >> -	mapping->nr_pages = size / PAGE_SIZE;
> >> +	pkvm_mapping_set_nr_pages(mapping, size / PAGE_SIZE,
> >> +				  !(prot & (KVM_PGTABLE_PROT_DEVICE |
> >> +					    KVM_PGTABLE_PROT_NORMAL_NC)));
> >>  	pkvm_mapping_insert(mapping, &pgt->pkvm_mappings);
> >>  
> >>  	return ret;
> >> @@ -503,7 +528,7 @@ int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable
> >*pgt, u64 addr, u64 size)
> >>  	lockdep_assert_held(&kvm->mmu_lock);
> >>  	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
> >>  		ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn,
> >> -					mapping->nr_pages);
> >> +					pkvm_mapping_nr_pages(mapping));
> >>  		if (WARN_ON(ret))
> >>  			break;
> >>  	}
> >> @@ -517,9 +542,13 @@ int pkvm_pgtable_stage2_flush(struct kvm_pgtable
> >*pgt, u64 addr, u64 size)
> >>  	struct pkvm_mapping *mapping;
> >>  
> >>  	lockdep_assert_held(&kvm->mmu_lock);
> >> -	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
> >> +	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
> >> +		if (!pkvm_mapping_is_cacheable(mapping))
> >> +			continue;
> >> +
> >>  		__clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn),
> >> -					  PAGE_SIZE * mapping->nr_pages);
> >> +					  PAGE_SIZE * pkvm_mapping_nr_pages(mapping));
> >> +	}
> >>  
> >>  	return 0;
> >>  }
> >> @@ -536,8 +565,10 @@ bool pkvm_pgtable_stage2_test_clear_young(struct
> >kvm_pgtable *pgt, u64 addr, u64
> >>  
> >>  	lockdep_assert_held(&kvm->mmu_lock);
> >>  	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
> >> -		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
> >> -					   mapping->nr_pages, mkold);
> >> +		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest,
> >> +					   handle, mapping->gfn,
> >> +					   pkvm_mapping_nr_pages(mapping),
> >> +					   mkold);
> >>  
> >>  	return young;
> >>  }
> >> -- 
> >> 2.53.0
> >> 
> >
> 
> Thanks!


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/3] KVM: arm64: skip pKVM cache flushes for non cacheable mappings
  2026-07-01 16:53       ` Leonardo Bras
@ 2026-07-01 16:54         ` Bradley Morgan
  2026-07-01 16:56           ` Leonardo Bras
  0 siblings, 1 reply; 16+ messages in thread
From: Bradley Morgan @ 2026-07-01 16:54 UTC (permalink / raw)
  To: Leonardo Bras
  Cc: Marc Zyngier, Oliver Upton, Fuad Tabba, Joey Gouly, Steffen Eiden,
	Suzuki K Poulose, Zenghui Yu, Catalin Marinas, Will Deacon,
	Quentin Perret, Vincent Donnefort, Gavin Shan, Alexandru Elisei,
	linux-arm-kernel, kvmarm, linux-kernel

On July 1, 2026 5:53:34 PM GMT+01:00, Leonardo Bras <leo.bras@arm.com>
wrote:
>On Wed, Jul 01, 2026 at 05:40:46PM +0100, Bradley Morgan wrote:
>> On July 1, 2026 5:05:53 PM GMT+01:00, Leonardo Bras <leo.bras@arm.com>
>> wrote:
>> >On Wed, Jun 24, 2026 at 04:00:26PM +0000, Bradley Morgan wrote:
>> >> pKVM keeps its own mapping list for stage 2 operations. Its flush
>path
>> >> uses that list directly, so it lost the PTE attribute check done by
>the
>> >> generic stage 2 walker.
>> >> 
>> >> Record whether a mapping is cacheable and skip cache maintenance for
>> >> mappings that are not cacheable.
>> >> 
>> >> Fixes: e912efed485a ("KVM: arm64: Introduce the EL1 pKVM MMU")
>> >> Signed-off-by: Bradley Morgan <include@grrlz.net>
>> >> ---
>> >>  arch/arm64/kvm/pkvm.c | 51
>++++++++++++++++++++++++++++++++++---------
>> >>  1 file changed, 41 insertions(+), 10 deletions(-)
>> >> 
>> >> diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
>> >> index 428723b1b0f5..ca6e823028c2 100644
>> >> --- a/arch/arm64/kvm/pkvm.c
>> >> +++ b/arch/arm64/kvm/pkvm.c
>> >> @@ -302,9 +302,32 @@ static u64 __pkvm_mapping_start(struct
>pkvm_mapping
>> >*m)
>> >>  	return m->gfn * PAGE_SIZE;
>> >>  }
>> >>  
>> >> +#define PKVM_MAPPING_NR_PAGES_MASK	GENMASK_ULL(47, 0)
>> >> +#define PKVM_MAPPING_CACHEABLE		BIT_ULL(48)
>> >
>> >Out of curiosity here, why do you choose to use bit 48 here instead of,
>> >let's say, bit 63?
>> >
>> >(I know it makes absolutely no difference to inner working here, as
>there 
>> >should probably not be 2^48 pages in one mapping.)
>> >
>> >Thanks!
>> >Leo
>> 
>> 
>> sup Leo, here's a quote from maz 
>
>Hi Bradley,
>
>> 
>> "This thing is already big enough, let's not add a bool right in the
>> middle (use pahole to find out why this is bad).
>
>I suppose you proposed to add a bool into a struct, maybe?
>It would screw the struct alignment.

yep, crappy old me


>> Given that nr_pages
>> is for a range, and that the minimum page size uses 12 bits, the
>> largest number of pages you can have here is 56-12=48 bit wide. That's
>> another 16 bits worth of flags you can use."
>
>Humm, makes sense.
>And since he mentions 16 bits worth of flags, you start by using the 48th 
>bit. Ok, got your rationale.
>
>(I would possibly start with the 63, though, but that's more on personal 
>taste)

48 won't make the world blow up :)

>> 
>> this should just clarify things, any questions, feel more than free to
>ask!
>> 
>> (btw V4 is coming soon)
>
>Thanks!
>Leo
>
>> 
>> >> +
>> >> +static u64 pkvm_mapping_nr_pages(struct pkvm_mapping *m)
>> >> +{
>> >> +	return m->nr_pages & PKVM_MAPPING_NR_PAGES_MASK;
>> >> +}
>> >> +
>> >> +static bool pkvm_mapping_is_cacheable(struct pkvm_mapping *m)
>> >> +{
>> >> +	return m->nr_pages & PKVM_MAPPING_CACHEABLE;
>> >> +}
>> >> +
>> >> +static void pkvm_mapping_set_nr_pages(struct pkvm_mapping *m, u64
>> >nr_pages,
>> >> +				      bool cacheable)
>> >> +{
>> >> +	WARN_ON_ONCE(nr_pages & ~PKVM_MAPPING_NR_PAGES_MASK);
>> >> +
>> >> +	m->nr_pages = nr_pages & PKVM_MAPPING_NR_PAGES_MASK;
>> >> +	if (cacheable)
>> >> +		m->nr_pages |= PKVM_MAPPING_CACHEABLE;
>> >> +}
>> >> +
>> >>  static u64 __pkvm_mapping_end(struct pkvm_mapping *m)
>> >>  {
>> >> -	return (m->gfn + m->nr_pages) * PAGE_SIZE - 1;
>> >> +	return (m->gfn + pkvm_mapping_nr_pages(m)) * PAGE_SIZE - 1;
>> >>  }
>> >>  
>> >>  INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64, __subtree_last,
>> >> @@ -350,7 +373,7 @@ static int __pkvm_pgtable_stage2_reclaim(struct
>> >kvm_pgtable *pgt, u64 start, u64
>> >>  			continue;
>> >>  
>> >>  		page = pfn_to_page(mapping->pfn);
>> >> -		WARN_ON_ONCE(mapping->nr_pages != 1);
>> >> +		WARN_ON_ONCE(pkvm_mapping_nr_pages(mapping) != 1);
>> >>  		unpin_user_pages_dirty_lock(&page, 1, true);
>> >>  		account_locked_vm(kvm->mm, 1, false);
>> >>  		pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
>> >> @@ -369,7 +392,7 @@ static int __pkvm_pgtable_stage2_unshare(struct
>> >kvm_pgtable *pgt, u64 start, u64
>> >>  
>> >>  	for_each_mapping_in_range_safe(pgt, start, end, mapping) {
>> >>  		ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn,
>> >> -					mapping->nr_pages);
>> >> +					pkvm_mapping_nr_pages(mapping));
>> >>  		if (WARN_ON(ret))
>> >>  			return ret;
>> >>  		pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
>> >> @@ -448,7 +471,7 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable
>*pgt,
>> >u64 addr, u64 size,
>> >>  		 * permission faults are handled in the relax_perms() path.
>> >>  		 */
>> >>  		if (mapping) {
>> >> -			if (size == (mapping->nr_pages * PAGE_SIZE))
>> >> +			if (size == (pkvm_mapping_nr_pages(mapping) * PAGE_SIZE))
>> >>  				return -EAGAIN;
>> >>  
>> >>  			/*
>> >> @@ -472,7 +495,9 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable
>*pgt,
>> >u64 addr, u64 size,
>> >>  	swap(mapping, cache->mapping);
>> >>  	mapping->gfn = gfn;
>> >>  	mapping->pfn = pfn;
>> >> -	mapping->nr_pages = size / PAGE_SIZE;
>> >> +	pkvm_mapping_set_nr_pages(mapping, size / PAGE_SIZE,
>> >> +				  !(prot & (KVM_PGTABLE_PROT_DEVICE |
>> >> +					    KVM_PGTABLE_PROT_NORMAL_NC)));
>> >>  	pkvm_mapping_insert(mapping, &pgt->pkvm_mappings);
>> >>  
>> >>  	return ret;
>> >> @@ -503,7 +528,7 @@ int pkvm_pgtable_stage2_wrprotect(struct
>kvm_pgtable
>> >*pgt, u64 addr, u64 size)
>> >>  	lockdep_assert_held(&kvm->mmu_lock);
>> >>  	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
>> >>  		ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn,
>> >> -					mapping->nr_pages);
>> >> +					pkvm_mapping_nr_pages(mapping));
>> >>  		if (WARN_ON(ret))
>> >>  			break;
>> >>  	}
>> >> @@ -517,9 +542,13 @@ int pkvm_pgtable_stage2_flush(struct kvm_pgtable
>> >*pgt, u64 addr, u64 size)
>> >>  	struct pkvm_mapping *mapping;
>> >>  
>> >>  	lockdep_assert_held(&kvm->mmu_lock);
>> >> -	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
>> >> +	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
>> >> +		if (!pkvm_mapping_is_cacheable(mapping))
>> >> +			continue;
>> >> +
>> >>  		__clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn),
>> >> -					  PAGE_SIZE * mapping->nr_pages);
>> >> +					  PAGE_SIZE * pkvm_mapping_nr_pages(mapping));
>> >> +	}
>> >>  
>> >>  	return 0;
>> >>  }
>> >> @@ -536,8 +565,10 @@ bool pkvm_pgtable_stage2_test_clear_young(struct
>> >kvm_pgtable *pgt, u64 addr, u64
>> >>  
>> >>  	lockdep_assert_held(&kvm->mmu_lock);
>> >>  	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
>> >> -		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
>> >> -					   mapping->nr_pages, mkold);
>> >> +		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest,
>> >> +					   handle, mapping->gfn,
>> >> +					   pkvm_mapping_nr_pages(mapping),
>> >> +					   mkold);
>> >>  
>> >>  	return young;
>> >>  }
>> >> -- 
>> >> 2.53.0
>> >> 
>> >
>> 
>> Thanks!
>

Thanks!


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/3] KVM: arm64: skip pKVM cache flushes for non cacheable mappings
  2026-07-01 16:54         ` Bradley Morgan
@ 2026-07-01 16:56           ` Leonardo Bras
  2026-07-01 16:57             ` Bradley Morgan
  0 siblings, 1 reply; 16+ messages in thread
From: Leonardo Bras @ 2026-07-01 16:56 UTC (permalink / raw)
  To: Bradley Morgan
  Cc: Leonardo Bras, Marc Zyngier, Oliver Upton, Fuad Tabba, Joey Gouly,
	Steffen Eiden, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
	Will Deacon, Quentin Perret, Vincent Donnefort, Gavin Shan,
	Alexandru Elisei, linux-arm-kernel, kvmarm, linux-kernel

On Wed, Jul 01, 2026 at 05:54:40PM +0100, Bradley Morgan wrote:
> On July 1, 2026 5:53:34 PM GMT+01:00, Leonardo Bras <leo.bras@arm.com>
> wrote:
> >On Wed, Jul 01, 2026 at 05:40:46PM +0100, Bradley Morgan wrote:
> >> On July 1, 2026 5:05:53 PM GMT+01:00, Leonardo Bras <leo.bras@arm.com>
> >> wrote:
> >> >On Wed, Jun 24, 2026 at 04:00:26PM +0000, Bradley Morgan wrote:
> >> >> pKVM keeps its own mapping list for stage 2 operations. Its flush
> >path
> >> >> uses that list directly, so it lost the PTE attribute check done by
> >the
> >> >> generic stage 2 walker.
> >> >> 
> >> >> Record whether a mapping is cacheable and skip cache maintenance for
> >> >> mappings that are not cacheable.
> >> >> 
> >> >> Fixes: e912efed485a ("KVM: arm64: Introduce the EL1 pKVM MMU")
> >> >> Signed-off-by: Bradley Morgan <include@grrlz.net>
> >> >> ---
> >> >>  arch/arm64/kvm/pkvm.c | 51
> >++++++++++++++++++++++++++++++++++---------
> >> >>  1 file changed, 41 insertions(+), 10 deletions(-)
> >> >> 
> >> >> diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
> >> >> index 428723b1b0f5..ca6e823028c2 100644
> >> >> --- a/arch/arm64/kvm/pkvm.c
> >> >> +++ b/arch/arm64/kvm/pkvm.c
> >> >> @@ -302,9 +302,32 @@ static u64 __pkvm_mapping_start(struct
> >pkvm_mapping
> >> >*m)
> >> >>  	return m->gfn * PAGE_SIZE;
> >> >>  }
> >> >>  
> >> >> +#define PKVM_MAPPING_NR_PAGES_MASK	GENMASK_ULL(47, 0)
> >> >> +#define PKVM_MAPPING_CACHEABLE		BIT_ULL(48)
> >> >
> >> >Out of curiosity here, why do you choose to use bit 48 here instead of,
> >> >let's say, bit 63?
> >> >
> >> >(I know it makes absolutely no difference to inner working here, as
> >there 
> >> >should probably not be 2^48 pages in one mapping.)
> >> >
> >> >Thanks!
> >> >Leo
> >> 
> >> 
> >> sup Leo, here's a quote from maz 
> >
> >Hi Bradley,
> >
> >> 
> >> "This thing is already big enough, let's not add a bool right in the
> >> middle (use pahole to find out why this is bad).
> >
> >I suppose you proposed to add a bool into a struct, maybe?
> >It would screw the struct alignment.
> 
> yep, crappy old me
> 

Hah, you were probably focused on the big picture.

> 
> >> Given that nr_pages
> >> is for a range, and that the minimum page size uses 12 bits, the
> >> largest number of pages you can have here is 56-12=48 bit wide. That's
> >> another 16 bits worth of flags you can use."
> >
> >Humm, makes sense.
> >And since he mentions 16 bits worth of flags, you start by using the 48th 
> >bit. Ok, got your rationale.
> >
> >(I would possibly start with the 63, though, but that's more on personal 
> >taste)
> 
> 48 won't make the world blow up :)

yeap,

> 
> >> 
> >> this should just clarify things, any questions, feel more than free to
> >ask!
> >> 
> >> (btw V4 is coming soon)
> >
> >Thanks!
> >Leo
> >
> >> 
> >> >> +
> >> >> +static u64 pkvm_mapping_nr_pages(struct pkvm_mapping *m)
> >> >> +{
> >> >> +	return m->nr_pages & PKVM_MAPPING_NR_PAGES_MASK;
> >> >> +}
> >> >> +
> >> >> +static bool pkvm_mapping_is_cacheable(struct pkvm_mapping *m)
> >> >> +{
> >> >> +	return m->nr_pages & PKVM_MAPPING_CACHEABLE;
> >> >> +}
> >> >> +
> >> >> +static void pkvm_mapping_set_nr_pages(struct pkvm_mapping *m, u64
> >> >nr_pages,
> >> >> +				      bool cacheable)
> >> >> +{
> >> >> +	WARN_ON_ONCE(nr_pages & ~PKVM_MAPPING_NR_PAGES_MASK);
> >> >> +
> >> >> +	m->nr_pages = nr_pages & PKVM_MAPPING_NR_PAGES_MASK;
> >> >> +	if (cacheable)
> >> >> +		m->nr_pages |= PKVM_MAPPING_CACHEABLE;
> >> >> +}
> >> >> +
> >> >>  static u64 __pkvm_mapping_end(struct pkvm_mapping *m)
> >> >>  {
> >> >> -	return (m->gfn + m->nr_pages) * PAGE_SIZE - 1;
> >> >> +	return (m->gfn + pkvm_mapping_nr_pages(m)) * PAGE_SIZE - 1;
> >> >>  }
> >> >>  
> >> >>  INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64, __subtree_last,
> >> >> @@ -350,7 +373,7 @@ static int __pkvm_pgtable_stage2_reclaim(struct
> >> >kvm_pgtable *pgt, u64 start, u64
> >> >>  			continue;
> >> >>  
> >> >>  		page = pfn_to_page(mapping->pfn);
> >> >> -		WARN_ON_ONCE(mapping->nr_pages != 1);
> >> >> +		WARN_ON_ONCE(pkvm_mapping_nr_pages(mapping) != 1);
> >> >>  		unpin_user_pages_dirty_lock(&page, 1, true);
> >> >>  		account_locked_vm(kvm->mm, 1, false);
> >> >>  		pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
> >> >> @@ -369,7 +392,7 @@ static int __pkvm_pgtable_stage2_unshare(struct
> >> >kvm_pgtable *pgt, u64 start, u64
> >> >>  
> >> >>  	for_each_mapping_in_range_safe(pgt, start, end, mapping) {
> >> >>  		ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn,
> >> >> -					mapping->nr_pages);
> >> >> +					pkvm_mapping_nr_pages(mapping));
> >> >>  		if (WARN_ON(ret))
> >> >>  			return ret;
> >> >>  		pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
> >> >> @@ -448,7 +471,7 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable
> >*pgt,
> >> >u64 addr, u64 size,
> >> >>  		 * permission faults are handled in the relax_perms() path.
> >> >>  		 */
> >> >>  		if (mapping) {
> >> >> -			if (size == (mapping->nr_pages * PAGE_SIZE))
> >> >> +			if (size == (pkvm_mapping_nr_pages(mapping) * PAGE_SIZE))
> >> >>  				return -EAGAIN;
> >> >>  
> >> >>  			/*
> >> >> @@ -472,7 +495,9 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable
> >*pgt,
> >> >u64 addr, u64 size,
> >> >>  	swap(mapping, cache->mapping);
> >> >>  	mapping->gfn = gfn;
> >> >>  	mapping->pfn = pfn;
> >> >> -	mapping->nr_pages = size / PAGE_SIZE;
> >> >> +	pkvm_mapping_set_nr_pages(mapping, size / PAGE_SIZE,
> >> >> +				  !(prot & (KVM_PGTABLE_PROT_DEVICE |
> >> >> +					    KVM_PGTABLE_PROT_NORMAL_NC)));
> >> >>  	pkvm_mapping_insert(mapping, &pgt->pkvm_mappings);
> >> >>  
> >> >>  	return ret;
> >> >> @@ -503,7 +528,7 @@ int pkvm_pgtable_stage2_wrprotect(struct
> >kvm_pgtable
> >> >*pgt, u64 addr, u64 size)
> >> >>  	lockdep_assert_held(&kvm->mmu_lock);
> >> >>  	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
> >> >>  		ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn,
> >> >> -					mapping->nr_pages);
> >> >> +					pkvm_mapping_nr_pages(mapping));
> >> >>  		if (WARN_ON(ret))
> >> >>  			break;
> >> >>  	}
> >> >> @@ -517,9 +542,13 @@ int pkvm_pgtable_stage2_flush(struct kvm_pgtable
> >> >*pgt, u64 addr, u64 size)
> >> >>  	struct pkvm_mapping *mapping;
> >> >>  
> >> >>  	lockdep_assert_held(&kvm->mmu_lock);
> >> >> -	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
> >> >> +	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
> >> >> +		if (!pkvm_mapping_is_cacheable(mapping))
> >> >> +			continue;
> >> >> +
> >> >>  		__clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn),
> >> >> -					  PAGE_SIZE * mapping->nr_pages);
> >> >> +					  PAGE_SIZE * pkvm_mapping_nr_pages(mapping));
> >> >> +	}
> >> >>  
> >> >>  	return 0;
> >> >>  }
> >> >> @@ -536,8 +565,10 @@ bool pkvm_pgtable_stage2_test_clear_young(struct
> >> >kvm_pgtable *pgt, u64 addr, u64
> >> >>  
> >> >>  	lockdep_assert_held(&kvm->mmu_lock);
> >> >>  	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
> >> >> -		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
> >> >> -					   mapping->nr_pages, mkold);
> >> >> +		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest,
> >> >> +					   handle, mapping->gfn,
> >> >> +					   pkvm_mapping_nr_pages(mapping),
> >> >> +					   mkold);
> >> >>  
> >> >>  	return young;
> >> >>  }
> >> >> -- 
> >> >> 2.53.0
> >> >> 
> >> >
> >> 
> >> Thanks!
> >
> 
> Thanks!

Thanks!


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/3] KVM: arm64: skip pKVM cache flushes for non cacheable mappings
  2026-07-01 16:56           ` Leonardo Bras
@ 2026-07-01 16:57             ` Bradley Morgan
  2026-07-02 10:43               ` Leonardo Bras
  0 siblings, 1 reply; 16+ messages in thread
From: Bradley Morgan @ 2026-07-01 16:57 UTC (permalink / raw)
  To: Leonardo Bras
  Cc: Marc Zyngier, Oliver Upton, Fuad Tabba, Joey Gouly, Steffen Eiden,
	Suzuki K Poulose, Zenghui Yu, Catalin Marinas, Will Deacon,
	Quentin Perret, Vincent Donnefort, Gavin Shan, Alexandru Elisei,
	linux-arm-kernel, kvmarm, linux-kernel

On July 1, 2026 5:56:37 PM GMT+01:00, Leonardo Bras <leo.bras@arm.com>
wrote:
>On Wed, Jul 01, 2026 at 05:54:40PM +0100, Bradley Morgan wrote:
>> On July 1, 2026 5:53:34 PM GMT+01:00, Leonardo Bras <leo.bras@arm.com>
>> wrote:
>> >On Wed, Jul 01, 2026 at 05:40:46PM +0100, Bradley Morgan wrote:
>> >> On July 1, 2026 5:05:53 PM GMT+01:00, Leonardo Bras
><leo.bras@arm.com>
>> >> wrote:
>> >> >On Wed, Jun 24, 2026 at 04:00:26PM +0000, Bradley Morgan wrote:
>> >> >> pKVM keeps its own mapping list for stage 2 operations. Its flush
>> >path
>> >> >> uses that list directly, so it lost the PTE attribute check done
>by
>> >the
>> >> >> generic stage 2 walker.
>> >> >> 
>> >> >> Record whether a mapping is cacheable and skip cache maintenance
>for
>> >> >> mappings that are not cacheable.
>> >> >> 
>> >> >> Fixes: e912efed485a ("KVM: arm64: Introduce the EL1 pKVM MMU")
>> >> >> Signed-off-by: Bradley Morgan <include@grrlz.net>
>> >> >> ---
>> >> >>  arch/arm64/kvm/pkvm.c | 51
>> >++++++++++++++++++++++++++++++++++---------
>> >> >>  1 file changed, 41 insertions(+), 10 deletions(-)
>> >> >> 
>> >> >> diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
>> >> >> index 428723b1b0f5..ca6e823028c2 100644
>> >> >> --- a/arch/arm64/kvm/pkvm.c
>> >> >> +++ b/arch/arm64/kvm/pkvm.c
>> >> >> @@ -302,9 +302,32 @@ static u64 __pkvm_mapping_start(struct
>> >pkvm_mapping
>> >> >*m)
>> >> >>  	return m->gfn * PAGE_SIZE;
>> >> >>  }
>> >> >>  
>> >> >> +#define PKVM_MAPPING_NR_PAGES_MASK	GENMASK_ULL(47, 0)
>> >> >> +#define PKVM_MAPPING_CACHEABLE		BIT_ULL(48)
>> >> >
>> >> >Out of curiosity here, why do you choose to use bit 48 here instead
>of,
>> >> >let's say, bit 63?
>> >> >
>> >> >(I know it makes absolutely no difference to inner working here, as
>> >there 
>> >> >should probably not be 2^48 pages in one mapping.)
>> >> >
>> >> >Thanks!
>> >> >Leo
>> >> 
>> >> 
>> >> sup Leo, here's a quote from maz 
>> >
>> >Hi Bradley,
>> >
>> >> 
>> >> "This thing is already big enough, let's not add a bool right in the
>> >> middle (use pahole to find out why this is bad).
>> >
>> >I suppose you proposed to add a bool into a struct, maybe?
>> >It would screw the struct alignment.
>> 
>> yep, crappy old me
>> 
>
>Hah, you were probably focused on the big picture.
>
>> 
>> >> Given that nr_pages
>> >> is for a range, and that the minimum page size uses 12 bits, the
>> >> largest number of pages you can have here is 56-12=48 bit wide.
>That's
>> >> another 16 bits worth of flags you can use."
>> >
>> >Humm, makes sense.
>> >And since he mentions 16 bits worth of flags, you start by using the
>48th 
>> >bit. Ok, got your rationale.
>> >
>> >(I would possibly start with the 63, though, but that's more on
>personal 
>> >taste)
>> 
>> 48 won't make the world blow up :)
>
>yeap,


Would you like to be CCed on v4 or nahhhh?

>> 
>> >> 
>> >> this should just clarify things, any questions, feel more than free
>to
>> >ask!
>> >> 
>> >> (btw V4 is coming soon)
>> >
>> >Thanks!
>> >Leo
>> >
>> >> 
>> >> >> +
>> >> >> +static u64 pkvm_mapping_nr_pages(struct pkvm_mapping *m)
>> >> >> +{
>> >> >> +	return m->nr_pages & PKVM_MAPPING_NR_PAGES_MASK;
>> >> >> +}
>> >> >> +
>> >> >> +static bool pkvm_mapping_is_cacheable(struct pkvm_mapping *m)
>> >> >> +{
>> >> >> +	return m->nr_pages & PKVM_MAPPING_CACHEABLE;
>> >> >> +}
>> >> >> +
>> >> >> +static void pkvm_mapping_set_nr_pages(struct pkvm_mapping *m, u64
>> >> >nr_pages,
>> >> >> +				      bool cacheable)
>> >> >> +{
>> >> >> +	WARN_ON_ONCE(nr_pages & ~PKVM_MAPPING_NR_PAGES_MASK);
>> >> >> +
>> >> >> +	m->nr_pages = nr_pages & PKVM_MAPPING_NR_PAGES_MASK;
>> >> >> +	if (cacheable)
>> >> >> +		m->nr_pages |= PKVM_MAPPING_CACHEABLE;
>> >> >> +}
>> >> >> +
>> >> >>  static u64 __pkvm_mapping_end(struct pkvm_mapping *m)
>> >> >>  {
>> >> >> -	return (m->gfn + m->nr_pages) * PAGE_SIZE - 1;
>> >> >> +	return (m->gfn + pkvm_mapping_nr_pages(m)) * PAGE_SIZE - 1;
>> >> >>  }
>> >> >>  
>> >> >>  INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64,
>__subtree_last,
>> >> >> @@ -350,7 +373,7 @@ static int
>__pkvm_pgtable_stage2_reclaim(struct
>> >> >kvm_pgtable *pgt, u64 start, u64
>> >> >>  			continue;
>> >> >>  
>> >> >>  		page = pfn_to_page(mapping->pfn);
>> >> >> -		WARN_ON_ONCE(mapping->nr_pages != 1);
>> >> >> +		WARN_ON_ONCE(pkvm_mapping_nr_pages(mapping) != 1);
>> >> >>  		unpin_user_pages_dirty_lock(&page, 1, true);
>> >> >>  		account_locked_vm(kvm->mm, 1, false);
>> >> >>  		pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
>> >> >> @@ -369,7 +392,7 @@ static int
>__pkvm_pgtable_stage2_unshare(struct
>> >> >kvm_pgtable *pgt, u64 start, u64
>> >> >>  
>> >> >>  	for_each_mapping_in_range_safe(pgt, start, end, mapping) {
>> >> >>  		ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn,
>> >> >> -					mapping->nr_pages);
>> >> >> +					pkvm_mapping_nr_pages(mapping));
>> >> >>  		if (WARN_ON(ret))
>> >> >>  			return ret;
>> >> >>  		pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
>> >> >> @@ -448,7 +471,7 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable
>> >*pgt,
>> >> >u64 addr, u64 size,
>> >> >>  		 * permission faults are handled in the relax_perms() path.
>> >> >>  		 */
>> >> >>  		if (mapping) {
>> >> >> -			if (size == (mapping->nr_pages * PAGE_SIZE))
>> >> >> +			if (size == (pkvm_mapping_nr_pages(mapping) * PAGE_SIZE))
>> >> >>  				return -EAGAIN;
>> >> >>  
>> >> >>  			/*
>> >> >> @@ -472,7 +495,9 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable
>> >*pgt,
>> >> >u64 addr, u64 size,
>> >> >>  	swap(mapping, cache->mapping);
>> >> >>  	mapping->gfn = gfn;
>> >> >>  	mapping->pfn = pfn;
>> >> >> -	mapping->nr_pages = size / PAGE_SIZE;
>> >> >> +	pkvm_mapping_set_nr_pages(mapping, size / PAGE_SIZE,
>> >> >> +				  !(prot & (KVM_PGTABLE_PROT_DEVICE |
>> >> >> +					    KVM_PGTABLE_PROT_NORMAL_NC)));
>> >> >>  	pkvm_mapping_insert(mapping, &pgt->pkvm_mappings);
>> >> >>  
>> >> >>  	return ret;
>> >> >> @@ -503,7 +528,7 @@ int pkvm_pgtable_stage2_wrprotect(struct
>> >kvm_pgtable
>> >> >*pgt, u64 addr, u64 size)
>> >> >>  	lockdep_assert_held(&kvm->mmu_lock);
>> >> >>  	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
>> >> >>  		ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn,
>> >> >> -					mapping->nr_pages);
>> >> >> +					pkvm_mapping_nr_pages(mapping));
>> >> >>  		if (WARN_ON(ret))
>> >> >>  			break;
>> >> >>  	}
>> >> >> @@ -517,9 +542,13 @@ int pkvm_pgtable_stage2_flush(struct
>kvm_pgtable
>> >> >*pgt, u64 addr, u64 size)
>> >> >>  	struct pkvm_mapping *mapping;
>> >> >>  
>> >> >>  	lockdep_assert_held(&kvm->mmu_lock);
>> >> >> -	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
>> >> >> +	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
>> >> >> +		if (!pkvm_mapping_is_cacheable(mapping))
>> >> >> +			continue;
>> >> >> +
>> >> >>  		__clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn),
>> >> >> -					  PAGE_SIZE * mapping->nr_pages);
>> >> >> +					  PAGE_SIZE * pkvm_mapping_nr_pages(mapping));
>> >> >> +	}
>> >> >>  
>> >> >>  	return 0;
>> >> >>  }
>> >> >> @@ -536,8 +565,10 @@ bool
>pkvm_pgtable_stage2_test_clear_young(struct
>> >> >kvm_pgtable *pgt, u64 addr, u64
>> >> >>  
>> >> >>  	lockdep_assert_held(&kvm->mmu_lock);
>> >> >>  	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
>> >> >> -		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
>> >> >> -					   mapping->nr_pages, mkold);
>> >> >> +		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest,
>> >> >> +					   handle, mapping->gfn,
>> >> >> +					   pkvm_mapping_nr_pages(mapping),
>> >> >> +					   mkold);
>> >> >>  
>> >> >>  	return young;
>> >> >>  }
>> >> >> -- 
>> >> >> 2.53.0
>> >> >> 
>> >> >
>> >> 
>> >> Thanks!
>> >
>> 
>> Thanks!
>
>Thanks!
>

Thanks!


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/3] KVM: arm64: skip pKVM cache flushes for non cacheable mappings
  2026-07-01 16:57             ` Bradley Morgan
@ 2026-07-02 10:43               ` Leonardo Bras
  0 siblings, 0 replies; 16+ messages in thread
From: Leonardo Bras @ 2026-07-02 10:43 UTC (permalink / raw)
  To: Bradley Morgan
  Cc: Leonardo Bras, Marc Zyngier, Oliver Upton, Fuad Tabba, Joey Gouly,
	Steffen Eiden, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
	Will Deacon, Quentin Perret, Vincent Donnefort, Gavin Shan,
	Alexandru Elisei, linux-arm-kernel, kvmarm, linux-kernel

On Wed, Jul 01, 2026 at 05:57:48PM +0100, Bradley Morgan wrote:
> On July 1, 2026 5:56:37 PM GMT+01:00, Leonardo Bras <leo.bras@arm.com>
> wrote:
> >On Wed, Jul 01, 2026 at 05:54:40PM +0100, Bradley Morgan wrote:
> >> On July 1, 2026 5:53:34 PM GMT+01:00, Leonardo Bras <leo.bras@arm.com>
> >> wrote:
> >> >On Wed, Jul 01, 2026 at 05:40:46PM +0100, Bradley Morgan wrote:
> >> >> On July 1, 2026 5:05:53 PM GMT+01:00, Leonardo Bras
> ><leo.bras@arm.com>
> >> >> wrote:
> >> >> >On Wed, Jun 24, 2026 at 04:00:26PM +0000, Bradley Morgan wrote:
> >> >> >> pKVM keeps its own mapping list for stage 2 operations. Its flush
> >> >path
> >> >> >> uses that list directly, so it lost the PTE attribute check done
> >by
> >> >the
> >> >> >> generic stage 2 walker.
> >> >> >> 
> >> >> >> Record whether a mapping is cacheable and skip cache maintenance
> >for
> >> >> >> mappings that are not cacheable.
> >> >> >> 
> >> >> >> Fixes: e912efed485a ("KVM: arm64: Introduce the EL1 pKVM MMU")
> >> >> >> Signed-off-by: Bradley Morgan <include@grrlz.net>
> >> >> >> ---
> >> >> >>  arch/arm64/kvm/pkvm.c | 51
> >> >++++++++++++++++++++++++++++++++++---------
> >> >> >>  1 file changed, 41 insertions(+), 10 deletions(-)
> >> >> >> 
> >> >> >> diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
> >> >> >> index 428723b1b0f5..ca6e823028c2 100644
> >> >> >> --- a/arch/arm64/kvm/pkvm.c
> >> >> >> +++ b/arch/arm64/kvm/pkvm.c
> >> >> >> @@ -302,9 +302,32 @@ static u64 __pkvm_mapping_start(struct
> >> >pkvm_mapping
> >> >> >*m)
> >> >> >>  	return m->gfn * PAGE_SIZE;
> >> >> >>  }
> >> >> >>  
> >> >> >> +#define PKVM_MAPPING_NR_PAGES_MASK	GENMASK_ULL(47, 0)
> >> >> >> +#define PKVM_MAPPING_CACHEABLE		BIT_ULL(48)
> >> >> >
> >> >> >Out of curiosity here, why do you choose to use bit 48 here instead
> >of,
> >> >> >let's say, bit 63?
> >> >> >
> >> >> >(I know it makes absolutely no difference to inner working here, as
> >> >there 
> >> >> >should probably not be 2^48 pages in one mapping.)
> >> >> >
> >> >> >Thanks!
> >> >> >Leo
> >> >> 
> >> >> 
> >> >> sup Leo, here's a quote from maz 
> >> >
> >> >Hi Bradley,
> >> >
> >> >> 
> >> >> "This thing is already big enough, let's not add a bool right in the
> >> >> middle (use pahole to find out why this is bad).
> >> >
> >> >I suppose you proposed to add a bool into a struct, maybe?
> >> >It would screw the struct alignment.
> >> 
> >> yep, crappy old me
> >> 
> >
> >Hah, you were probably focused on the big picture.
> >
> >> 
> >> >> Given that nr_pages
> >> >> is for a range, and that the minimum page size uses 12 bits, the
> >> >> largest number of pages you can have here is 56-12=48 bit wide.
> >That's
> >> >> another 16 bits worth of flags you can use."
> >> >
> >> >Humm, makes sense.
> >> >And since he mentions 16 bits worth of flags, you start by using the
> >48th 
> >> >bit. Ok, got your rationale.
> >> >
> >> >(I would possibly start with the 63, though, but that's more on
> >personal 
> >> >taste)
> >> 
> >> 48 won't make the world blow up :)
> >
> >yeap,
> 
> 
> Would you like to be CCed on v4 or nahhhh?

Whatever you feel like :)

Thanks!
Leo

> 
> >> 
> >> >> 
> >> >> this should just clarify things, any questions, feel more than free
> >to
> >> >ask!
> >> >> 
> >> >> (btw V4 is coming soon)
> >> >
> >> >Thanks!
> >> >Leo
> >> >
> >> >> 
> >> >> >> +
> >> >> >> +static u64 pkvm_mapping_nr_pages(struct pkvm_mapping *m)
> >> >> >> +{
> >> >> >> +	return m->nr_pages & PKVM_MAPPING_NR_PAGES_MASK;
> >> >> >> +}
> >> >> >> +
> >> >> >> +static bool pkvm_mapping_is_cacheable(struct pkvm_mapping *m)
> >> >> >> +{
> >> >> >> +	return m->nr_pages & PKVM_MAPPING_CACHEABLE;
> >> >> >> +}
> >> >> >> +
> >> >> >> +static void pkvm_mapping_set_nr_pages(struct pkvm_mapping *m, u64
> >> >> >nr_pages,
> >> >> >> +				      bool cacheable)
> >> >> >> +{
> >> >> >> +	WARN_ON_ONCE(nr_pages & ~PKVM_MAPPING_NR_PAGES_MASK);
> >> >> >> +
> >> >> >> +	m->nr_pages = nr_pages & PKVM_MAPPING_NR_PAGES_MASK;
> >> >> >> +	if (cacheable)
> >> >> >> +		m->nr_pages |= PKVM_MAPPING_CACHEABLE;
> >> >> >> +}
> >> >> >> +
> >> >> >>  static u64 __pkvm_mapping_end(struct pkvm_mapping *m)
> >> >> >>  {
> >> >> >> -	return (m->gfn + m->nr_pages) * PAGE_SIZE - 1;
> >> >> >> +	return (m->gfn + pkvm_mapping_nr_pages(m)) * PAGE_SIZE - 1;
> >> >> >>  }
> >> >> >>  
> >> >> >>  INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64,
> >__subtree_last,
> >> >> >> @@ -350,7 +373,7 @@ static int
> >__pkvm_pgtable_stage2_reclaim(struct
> >> >> >kvm_pgtable *pgt, u64 start, u64
> >> >> >>  			continue;
> >> >> >>  
> >> >> >>  		page = pfn_to_page(mapping->pfn);
> >> >> >> -		WARN_ON_ONCE(mapping->nr_pages != 1);
> >> >> >> +		WARN_ON_ONCE(pkvm_mapping_nr_pages(mapping) != 1);
> >> >> >>  		unpin_user_pages_dirty_lock(&page, 1, true);
> >> >> >>  		account_locked_vm(kvm->mm, 1, false);
> >> >> >>  		pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
> >> >> >> @@ -369,7 +392,7 @@ static int
> >__pkvm_pgtable_stage2_unshare(struct
> >> >> >kvm_pgtable *pgt, u64 start, u64
> >> >> >>  
> >> >> >>  	for_each_mapping_in_range_safe(pgt, start, end, mapping) {
> >> >> >>  		ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn,
> >> >> >> -					mapping->nr_pages);
> >> >> >> +					pkvm_mapping_nr_pages(mapping));
> >> >> >>  		if (WARN_ON(ret))
> >> >> >>  			return ret;
> >> >> >>  		pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
> >> >> >> @@ -448,7 +471,7 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable
> >> >*pgt,
> >> >> >u64 addr, u64 size,
> >> >> >>  		 * permission faults are handled in the relax_perms() path.
> >> >> >>  		 */
> >> >> >>  		if (mapping) {
> >> >> >> -			if (size == (mapping->nr_pages * PAGE_SIZE))
> >> >> >> +			if (size == (pkvm_mapping_nr_pages(mapping) * PAGE_SIZE))
> >> >> >>  				return -EAGAIN;
> >> >> >>  
> >> >> >>  			/*
> >> >> >> @@ -472,7 +495,9 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable
> >> >*pgt,
> >> >> >u64 addr, u64 size,
> >> >> >>  	swap(mapping, cache->mapping);
> >> >> >>  	mapping->gfn = gfn;
> >> >> >>  	mapping->pfn = pfn;
> >> >> >> -	mapping->nr_pages = size / PAGE_SIZE;
> >> >> >> +	pkvm_mapping_set_nr_pages(mapping, size / PAGE_SIZE,
> >> >> >> +				  !(prot & (KVM_PGTABLE_PROT_DEVICE |
> >> >> >> +					    KVM_PGTABLE_PROT_NORMAL_NC)));
> >> >> >>  	pkvm_mapping_insert(mapping, &pgt->pkvm_mappings);
> >> >> >>  
> >> >> >>  	return ret;
> >> >> >> @@ -503,7 +528,7 @@ int pkvm_pgtable_stage2_wrprotect(struct
> >> >kvm_pgtable
> >> >> >*pgt, u64 addr, u64 size)
> >> >> >>  	lockdep_assert_held(&kvm->mmu_lock);
> >> >> >>  	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
> >> >> >>  		ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn,
> >> >> >> -					mapping->nr_pages);
> >> >> >> +					pkvm_mapping_nr_pages(mapping));
> >> >> >>  		if (WARN_ON(ret))
> >> >> >>  			break;
> >> >> >>  	}
> >> >> >> @@ -517,9 +542,13 @@ int pkvm_pgtable_stage2_flush(struct
> >kvm_pgtable
> >> >> >*pgt, u64 addr, u64 size)
> >> >> >>  	struct pkvm_mapping *mapping;
> >> >> >>  
> >> >> >>  	lockdep_assert_held(&kvm->mmu_lock);
> >> >> >> -	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
> >> >> >> +	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
> >> >> >> +		if (!pkvm_mapping_is_cacheable(mapping))
> >> >> >> +			continue;
> >> >> >> +
> >> >> >>  		__clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn),
> >> >> >> -					  PAGE_SIZE * mapping->nr_pages);
> >> >> >> +					  PAGE_SIZE * pkvm_mapping_nr_pages(mapping));
> >> >> >> +	}
> >> >> >>  
> >> >> >>  	return 0;
> >> >> >>  }
> >> >> >> @@ -536,8 +565,10 @@ bool
> >pkvm_pgtable_stage2_test_clear_young(struct
> >> >> >kvm_pgtable *pgt, u64 addr, u64
> >> >> >>  
> >> >> >>  	lockdep_assert_held(&kvm->mmu_lock);
> >> >> >>  	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
> >> >> >> -		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
> >> >> >> -					   mapping->nr_pages, mkold);
> >> >> >> +		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest,
> >> >> >> +					   handle, mapping->gfn,
> >> >> >> +					   pkvm_mapping_nr_pages(mapping),
> >> >> >> +					   mkold);
> >> >> >>  
> >> >> >>  	return young;
> >> >> >>  }
> >> >> >> -- 
> >> >> >> 2.53.0
> >> >> >> 
> >> >> >
> >> >> 
> >> >> Thanks!
> >> >
> >> 
> >> Thanks!
> >
> >Thanks!
> >
> 
> Thanks!


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2026-07-02 10:43 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-24 16:00 [PATCH v3 0/3] KVM: arm64: fix pKVM mapping cache corner cases Bradley Morgan
2026-06-24 16:00 ` [PATCH v3 1/3] KVM: arm64: skip pKVM cache flushes for non cacheable mappings Bradley Morgan
2026-07-01 13:31   ` Vincent Donnefort
2026-07-01 16:05   ` Leonardo Bras
2026-07-01 16:40     ` Bradley Morgan
2026-07-01 16:53       ` Leonardo Bras
2026-07-01 16:54         ` Bradley Morgan
2026-07-01 16:56           ` Leonardo Bras
2026-07-01 16:57             ` Bradley Morgan
2026-07-02 10:43               ` Leonardo Bras
2026-06-24 16:00 ` [PATCH v3 2/3] KVM: arm64: top up pKVM mapping cache for permission faults Bradley Morgan
2026-06-24 16:00 ` [PATCH v3 3/3] KVM: arm64: top up stage 2 memcache for dirty logging faults Bradley Morgan
2026-06-24 17:39   ` Bradley Morgan
2026-06-24 17:46     ` Bradley Morgan
2026-06-24 18:25       ` Marc Zyngier
2026-06-24 18:37         ` Bradley Morgan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox