linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [RFC] KVM: arm64: support enabling dirty log graually in small chunks
@ 2020-03-09  8:57 Keqian Zhu
  2020-03-09 11:45 ` Marc Zyngier
  0 siblings, 1 reply; 7+ messages in thread
From: Keqian Zhu @ 2020-03-09  8:57 UTC (permalink / raw)
  To: kvmarm, kvm, linux-kernel, linux-arm-kernel
  Cc: Jay Zhou, Sean Christopherson, Peter Xu, Paolo Bonzini,
	wanghaibin.wang, Keqian Zhu

There is already support of enabling dirty log graually
in small chunks for x86. This adds support for arm64.

Under the Huawei Kunpeng 920 2.6GHz platform, I did some
tests with a 128G linux VM and counted the time taken of
memory_global_dirty_log_start, here is the numbers:

VM Size        Before    After optimization
128G           527ms     4ms

Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
---
Cc: Jay Zhou <jianjay.zhou@huawei.com>
Cc: Paolo Bonzini <pbonzini@redhat.com> 
Cc: Peter Xu <peterx@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
---
 Documentation/virt/kvm/api.rst    |  2 +-
 arch/arm64/include/asm/kvm_host.h |  4 ++++
 virt/kvm/arm/mmu.c                | 30 ++++++++++++++++++++++--------
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 0adef66585b1..89d4f2680af1 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5735,7 +5735,7 @@ will be initialized to 1 when created.  This also improves performance because
 dirty logging can be enabled gradually in small chunks on the first call
 to KVM_CLEAR_DIRTY_LOG.  KVM_DIRTY_LOG_INITIALLY_SET depends on
 KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (it is also only available on
-x86 for now).
+x86 and arm64 for now).
 
 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name
 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d87aa609d2b6..0deb2ac7d091 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -16,6 +16,7 @@
 #include <linux/jump_label.h>
 #include <linux/kvm_types.h>
 #include <linux/percpu.h>
+#include <linux/kvm.h>
 #include <asm/arch_gicv3.h>
 #include <asm/barrier.h>
 #include <asm/cpufeature.h>
@@ -45,6 +46,9 @@
 #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
 #define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
 
+#define KVM_DIRTY_LOG_MANUAL_CAPS   (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
+					KVM_DIRTY_LOG_INITIALLY_SET)
+
 DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 
 extern unsigned int kvm_sve_max_vl;
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index e3b9ee268823..5c7ca84dec85 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1438,9 +1438,11 @@ static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
  * @pud:	pointer to pud entry
  * @addr:	range start address
  * @end:	range end address
+ * @wp_ptes:	write protect ptes or not
  */
 static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
-			   phys_addr_t addr, phys_addr_t end)
+			   phys_addr_t addr, phys_addr_t end,
+			   bool wp_ptes)
 {
 	pmd_t *pmd;
 	phys_addr_t next;
@@ -1453,7 +1455,7 @@ static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
 			if (pmd_thp_or_huge(*pmd)) {
 				if (!kvm_s2pmd_readonly(pmd))
 					kvm_set_s2pmd_readonly(pmd);
-			} else {
+			} else if (wp_ptes) {
 				stage2_wp_ptes(pmd, addr, next);
 			}
 		}
@@ -1465,9 +1467,11 @@ static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
  * @pgd:	pointer to pgd entry
  * @addr:	range start address
  * @end:	range end address
+ * @wp_ptes:	write protect ptes or not
  */
 static void  stage2_wp_puds(struct kvm *kvm, pgd_t *pgd,
-			    phys_addr_t addr, phys_addr_t end)
+			    phys_addr_t addr, phys_addr_t end,
+			    bool wp_ptes)
 {
 	pud_t *pud;
 	phys_addr_t next;
@@ -1480,7 +1484,7 @@ static void  stage2_wp_puds(struct kvm *kvm, pgd_t *pgd,
 				if (!kvm_s2pud_readonly(pud))
 					kvm_set_s2pud_readonly(pud);
 			} else {
-				stage2_wp_pmds(kvm, pud, addr, next);
+				stage2_wp_pmds(kvm, pud, addr, next, wp_ptes);
 			}
 		}
 	} while (pud++, addr = next, addr != end);
@@ -1491,8 +1495,10 @@ static void  stage2_wp_puds(struct kvm *kvm, pgd_t *pgd,
  * @kvm:	The KVM pointer
  * @addr:	Start address of range
  * @end:	End address of range
+ * @wp_ptes:	Write protect ptes or not
  */
-static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
+static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr,
+			    phys_addr_t end, bool wp_ptes)
 {
 	pgd_t *pgd;
 	phys_addr_t next;
@@ -1513,7 +1519,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
 			break;
 		next = stage2_pgd_addr_end(kvm, addr, end);
 		if (stage2_pgd_present(kvm, *pgd))
-			stage2_wp_puds(kvm, pgd, addr, next);
+			stage2_wp_puds(kvm, pgd, addr, next, wp_ptes);
 	} while (pgd++, addr = next, addr != end);
 }
 
@@ -1535,6 +1541,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
 	struct kvm_memslots *slots = kvm_memslots(kvm);
 	struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
 	phys_addr_t start, end;
+	bool wp_ptes;
 
 	if (WARN_ON_ONCE(!memslot))
 		return;
@@ -1543,7 +1550,14 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
 	end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
 
 	spin_lock(&kvm->mmu_lock);
-	stage2_wp_range(kvm, start, end);
+	/*
+	 * If we're with initial-all-set, we don't need to write protect
+	 * any small page because they're reported as dirty already.
+	 * However we still need to write-protect huge pages so that the
+	 * page split can happen lazily on the first write to the huge page.
+	 */
+	wp_ptes = !kvm_dirty_log_manual_protect_and_init_set(kvm);
+	stage2_wp_range(kvm, start, end, wp_ptes);
 	spin_unlock(&kvm->mmu_lock);
 	kvm_flush_remote_tlbs(kvm);
 }
@@ -1567,7 +1581,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 	phys_addr_t start = (base_gfn +  __ffs(mask)) << PAGE_SHIFT;
 	phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
 
-	stage2_wp_range(kvm, start, end);
+	stage2_wp_range(kvm, start, end, true);
 }
 
 /*
-- 
2.19.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-03-12  1:45 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-03-09  8:57 [RFC] KVM: arm64: support enabling dirty log graually in small chunks Keqian Zhu
2020-03-09 11:45 ` Marc Zyngier
2020-03-10  8:26   ` zhukeqian
2020-03-10 13:16     ` Marc Zyngier
2020-03-11  7:19       ` zhukeqian
2020-03-11  7:34         ` Zhoujian (jay)
2020-03-12  1:45           ` zhukeqian

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).