[PATCH 2/3] migration dirtybitmap support ARMv7

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Mario Smarduch <m.smarduch@samsung.com>
To: kvmarm@lists.cs.columbia.edu, christoffer.dall@linaro.org,
	marc.zyngier@arm.com, 이정석 <jays.lee@samsung.com>,
	정성진 <sungjinn.chung@samsung.com>
Cc: kvm@vger.kernel.org
Subject: [PATCH 2/3] migration dirtybitmap support ARMv7
Date: Mon, 14 Apr 2014 18:24:23 -0700	[thread overview]
Message-ID: <534C8A47.2050401@samsung.com> (raw)


- Support write protection of entire VM address space
- Split pmds section in migration mode
- Write protect dirty pages on Dirty log read 

Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
---
 arch/arm/kvm/mmu.c |  158 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 156 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 7789857..502e776 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -56,6 +56,13 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
 }
 
+void kvm_tlb_flush_vmid(struct kvm *kvm)
+{
+	phys_addr_t x;
+	/* based on function description 2nd argument is irrelevent */
+	kvm_tlb_flush_vmid_ipa(kvm, x);
+}
+
 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
 				  int min, int max)
 {
@@ -639,6 +646,143 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
 	return false;
 }
 
+/*
+ * Called when QEMU retrieves the dirty log and write protects dirty pages
+ * for next QEMU call to retrieve the dirty logn
+ */
+void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+		struct kvm_memory_slot *slot,
+		gfn_t gfn_offset, unsigned long mask)
+{
+	phys_addr_t ipa;
+	pgd_t *pgdp = kvm->arch.pgd, *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte, new_pte;
+
+	while (mask) {
+		ipa = (slot->base_gfn + gfn_offset + __ffs(mask)) << PAGE_SHIFT;
+		pgd = pgdp + pgd_index(ipa);
+		if (!pgd_present(*pgd))
+			goto update_mask;
+		pud = pud_offset(pgd, ipa);
+		if (!pud_present(*pud))
+			goto update_mask;
+		pmd = pmd_offset(pud, ipa);
+		if (!pmd_present(*pmd))
+			goto update_mask;
+		pte = pte_offset_kernel(pmd, ipa);
+		if (!pte_present(*pte))
+			goto update_mask;
+		if ((*pte & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY)
+			goto update_mask;
+		new_pte = pfn_pte(pte_pfn(*pte), PAGE_S2);
+		*pte = new_pte;
+update_mask:
+		mask &= mask - 1;
+	}
+}
+
+/*
+ * In migration splits PMDs into PTEs to keep track of dirty pages. Without
+ * spliting light execution prevents migration.
+ */
+bool split_pmd(struct kvm *kvm, pmd_t *pmd, u64 addr)
+{
+	struct page *page;
+	pfn_t pfn = pmd_pfn(*pmd);
+	pte_t *pte, new_pte;
+	int i;
+
+	page = alloc_page(GFP_KERNEL);
+	if (page == NULL)
+		return false;
+
+	pte = page_address(page);
+	for (i = 0; i < PMD_SIZE/PAGE_SIZE; i++) {
+		new_pte = pfn_pte(pfn+i, PAGE_S2);
+		pte[i] = new_pte;
+	}
+	kvm_clean_pte(pte);
+	pmd_populate_kernel(NULL, pmd, pte);
+
+	/*
+	* flush the whole TLB for VM  relying on hardware broadcast
+	*/
+	kvm_tlb_flush_vmid(kvm);
+	get_page(virt_to_page(pte));
+	return true;
+}
+
+/*
+ * Called from QEMU when migration dirty logging is started. Write the protect
+ * current set. Future faults writes are tracked through WP of when dirty log
+ * log.
+ */
+
+void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte, new_pte;
+	pgd_t *pgdp = kvm->arch.pgd;
+	struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot);
+	u64 start = memslot->base_gfn << PAGE_SHIFT;
+	u64 end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
+	u64 addr = start, addr1;
+
+	spin_lock(&kvm->mmu_lock);
+	kvm->arch.migration_in_progress = true;
+	while (addr < end) {
+		if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+			kvm_tlb_flush_vmid(kvm);
+			cond_resched_lock(&kvm->mmu_lock);
+		}
+
+		pgd = pgdp + pgd_index(addr);
+		if (!pgd_present(*pgd)) {
+			addr = pgd_addr_end(addr, end);
+			continue;
+		}
+
+		pud = pud_offset(pgd, addr);
+		if (pud_huge(*pud) || !pud_present(*pud)) {
+			addr = pud_addr_end(addr, end);
+			continue;
+		}
+
+		pmd = pmd_offset(pud, addr);
+		if (!pmd_present(*pmd)) {
+			addr = pmd_addr_end(addr, end);
+			continue;
+		}
+
+		if (kvm_pmd_huge(*pmd)) {
+			if (!split_pmd(kvm, pmd, addr)) {
+				kvm->arch.migration_in_progress = false;
+				return;
+			}
+			addr = pmd_addr_end(addr, end);
+			continue;
+		}
+
+		pte = pte_offset_kernel(pmd, addr);
+		addr1 = addr;
+		addr += PAGE_SIZE;
+		if (!pte_present(*pte))
+			continue;
+
+		if ((*pte & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY)
+			continue;
+
+		new_pte = pfn_pte(pte_pfn(*pte), PAGE_S2);
+		*pte = new_pte;
+	}
+	kvm_tlb_flush_vmid(kvm);
+	spin_unlock(&kvm->mmu_lock);
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot,
 			  unsigned long fault_status)
@@ -652,6 +796,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
 	struct vm_area_struct *vma;
 	pfn_t pfn;
+	bool migration_active = vcpu->kvm->arch.migration_in_progress;
 
 	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
 	if (fault_status == FSC_PERM && !write_fault) {
@@ -705,10 +850,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	spin_lock(&kvm->mmu_lock);
 	if (mmu_notifier_retry(kvm, mmu_seq))
 		goto out_unlock;
-	if (!hugetlb && !force_pte)
+	/* During migration don't rebuild huge pages */
+	if (!hugetlb && !force_pte && !migration_active)
 		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
 
-	if (hugetlb) {
+	if (!migration_active && hugetlb) {
 		pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2);
 		new_pmd = pmd_mkhuge(new_pmd);
 		if (writable) {
@@ -720,6 +866,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	} else {
 		pte_t new_pte = pfn_pte(pfn, PAGE_S2);
 		if (writable) {
+			if (migration_active && hugetlb) {
+				/* get back pfn from fault_ipa */
+				pfn += (fault_ipa >> PAGE_SHIFT) &
+					((1 << (PMD_SHIFT - PAGE_SHIFT))-1);
+				new_pte = pfn_pte(pfn, PAGE_S2);
+			}
 			kvm_set_s2pte_writable(&new_pte);
 			kvm_set_pfn_dirty(pfn);
 		}
@@ -727,6 +879,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);
 	}
 
+	if (writable)
+		mark_page_dirty(kvm, gfn);
 
 out_unlock:
 	spin_unlock(&kvm->mmu_lock);
-- 
1.7.9.5

next             reply	other threads:[~2014-04-15  1:24 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-04-15  1:24 Mario Smarduch [this message]
2014-04-15  8:58 ` [PATCH 2/3] migration dirtybitmap support ARMv7 Marc Zyngier
2014-04-16  1:18   ` Mario Smarduch

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:7789857 dfblob:502e776 )
 OR (
bs:"[PATCH 2/3] migration dirtybitmap support ARMv7" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=534C8A47.2050401@samsung.com \
    --to=m.smarduch@samsung.com \
    --cc=christoffer.dall@linaro.org \
    --cc=jays.lee@samsung.com \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.cs.columbia.edu \
    --cc=marc.zyngier@arm.com \
    --cc=sungjinn.chung@samsung.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.