[PATCH 2/5] live migration support for initial write protect of VM

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

From: Mario Smarduch <m.smarduch@samsung.com>
To: "kvmarm@lists.cs.columbia.edu" <kvmarm@lists.cs.columbia.edu>,
	Marc Zyngier <marc.zyngier@arm.com>,
	"christoffer.dall@linaro.org" <christoffer.dall@linaro.org>
Cc: "kvm@vger.kernel.org" <kvm@vger.kernel.org>,
	이정석 <jays.lee@samsung.com>, 정성진 <sungjinn.chung@samsung.com>
Subject: [PATCH 2/5] live migration support for initial write protect of VM
Date: Wed, 16 Apr 2014 18:33:50 -0700	[thread overview]
Message-ID: <534F2F7E.1040100@samsung.com> (raw)


Add support for initial write protection of guest VM, to later manage dirty
pages. Reduced TLB flushing to one flush after memory region is write protected.
This is based on Erics patch, which applied cleanly. The only patch I found
in the archives was the memory region delete, but still in arm.c.

Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
---
 arch/arm/include/asm/kvm_host.h |    1 +
 arch/arm/kvm/arm.c              |    4 ++
 arch/arm/kvm/mmu.c              |  125 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 130 insertions(+)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 315e3f3..7ac1fdc 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -229,5 +229,6 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
 int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
 
 void kvm_tlb_flush_vm(struct kvm *kvm);
+void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
 
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9a4bc10..7714cc6 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -249,6 +249,10 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 		unmap_stage2_range(kvm, gpa, size);
 		spin_unlock(&kvm->mmu_lock);
 	}
+
+	/* Request has been issued to migrate the guest, 1st write protect VM */
+	if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
+		kvm_mmu_slot_remove_write_access(kvm, mem->slot);
 }
 
 void kvm_arch_flush_shadow_all(struct kvm *kvm)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index d7a1846..b85ab56 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -648,6 +648,131 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
 	return false;
 }
 
+/**
+ * split_pmd - splits huge pages into small pages, required to keep a dirty
+ *  log of small memory granules, otherwise huge pages would need to be
+ *  migrated. Practically an idle system has problems migrating with
+ *  huge pages.  Called during WP of entire VM address space, done
+ *  initially when  migration thread isses the KVM_MEM_LOG_DIRTY_PAGES ioctl.
+ *  mmu_lock lock must be acquired by caller
+ *
+ * @kvm:        The KVM pointer
+ * @pmd:        pmd to 2nd stage huge page
+ * @addr: `     Guest Physical Address
+ */
+static bool split_pmd(struct kvm *kvm, pmd_t *pmd, u64 addr)
+{
+	struct page *page;
+	pfn_t pfn = pmd_pfn(*pmd);
+	pte_t *pte, new_pte;
+	int i;
+
+	page = alloc_page(GFP_KERNEL);
+	if (page == NULL)
+		return false;
+
+	pte = page_address(page);
+	/* first break up the huge page into small page pte's */
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		new_pte = pfn_pte(pfn+i, PAGE_S2);
+		pte[i] = new_pte;
+	}
+	kvm_clean_pte(pte);
+	/* now set the pmd to pte table */
+	pmd_populate_kernel(NULL, pmd, pte);
+
+	get_page(virt_to_page(pte));
+	return true;
+}
+
+
+/**
+ * kvm_mmu_slot_remove_access - write protects entire VM address space.
+ *  Called at start of migration when KVM_MEM_LOG_DIRTY_PAGES ioctl is
+ *  issued. After this function returns all pages - minus the ones faulted
+ *  in when mmu_lock is released, but those pages will be marked in dirty log
+ *  and are not forgotten.
+ *
+ *  Initial VM write protect sweep is required to keep track of dirty pages for
+ *  subsequent memory region dirty log retrieval.
+ *  - mmu_lock is held during - protect against concurent faults, mmu notifier
+ *    invalidate/unmap/update user pte, or direct device write to guest memory
+ *
+ * @kvm:        The KVM pointer
+ * @slot:       The memory slot the dirty log is retrieved for
+ */
+void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte, new_pte;
+	pgd_t *pgdp = kvm->arch.pgd;
+	struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot);
+	u64 start = memslot->base_gfn << PAGE_SHIFT;
+	u64 end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
+	u64 addr = start;
+
+	spin_lock(&kvm->mmu_lock);
+	kvm->arch.migration_in_progress = 1;
+	while (addr < end) {
+		/* Relieve contention for mmu_lock. there is no need to flush
+		 * TLBs here. TLB updates will be picked up on TLB refills or
+		 * flush of VM TLBs. The important things is after you terminate
+		 * loop all pmds have been split, write protected and visible
+		 */
+		if (need_resched() || spin_needbreak(&kvm->mmu_lock))
+			cond_resched_lock(&kvm->mmu_lock);
+
+		pgd = pgdp + pgd_index(addr);
+		if (!pgd_present(*pgd)) {
+			addr = pgd_addr_end(addr, end);
+			continue;
+		}
+
+		pud = pud_offset(pgd, addr);
+		if (pud_huge(*pud) || !pud_present(*pud)) {
+			addr = pud_addr_end(addr, end);
+			continue;
+		}
+
+		pmd = pmd_offset(pud, addr);
+		if (!pmd_present(*pmd)) {
+			addr = pmd_addr_end(addr, end);
+			continue;
+		}
+		if (kvm_pmd_huge(*pmd)) {
+			if (!split_pmd(kvm, pmd, addr)) {
+				/* Should fail migration here, but return from
+				 * here is not reflected in user space. The
+				 * status is detected on first dirty log
+				 * retrieval, where you cause migration abort
+				 * (user space aborts).
+				 */
+				kvm->arch.migration_in_progress = -1;
+				spin_unlock(&kvm->mmu_lock);
+				return;
+			}
+			addr = pmd_addr_end(addr, end);
+			continue;
+		}
+		pte = pte_offset_kernel(pmd, addr);
+		addr += PAGE_SIZE;
+		if (!pte_present(*pte))
+			continue;
+
+		/* Skip write protected or read only pages */
+		if ((*pte & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY)
+			continue;
+
+		new_pte = pfn_pte(pte_pfn(*pte), PAGE_S2);
+		*pte = new_pte;
+	}
+	/* Flush VM TLBs */
+	kvm_tlb_flush_vm(kvm);
+	spin_unlock(&kvm->mmu_lock);
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot,
 			  unsigned long fault_status)
-- 
1.7.9.5

                 reply	other threads:[~2014-04-17  1:33 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:315e3f3 dfblob:7ac1fdc dfblob:9a4bc10 dfblob:7714cc6
dfblob:d7a1846 dfblob:b85ab56 )
 OR (
bs:"[PATCH 2/5] live migration support for initial write protect of VM" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=534F2F7E.1040100@samsung.com \
    --to=m.smarduch@samsung.com \
    --cc=christoffer.dall@linaro.org \
    --cc=jays.lee@samsung.com \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.cs.columbia.edu \
    --cc=marc.zyngier@arm.com \
    --cc=sungjinn.chung@samsung.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox