[PATCH v3 3/4] live migration support for VM dirty log management

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Mario Smarduch <m.smarduch@samsung.com>
To: "kvmarm@lists.cs.columbia.edu" <kvmarm@lists.cs.columbia.edu>,
	"Marc Zyngier" <marc.zyngier@arm.com>,
	"christoffer.dall@linaro.org" <christoffer.dall@linaro.org>,
	이정석 <jays.lee@samsung.com>, 정성진 <sungjinn.chung@samsung.com>,
	gavin.guo@canonical.com,
	"kvm@vger.kernel.org" <kvm@vger.kernel.org>
Subject: [PATCH v3 3/4] live migration support for VM dirty log management
Date: Tue, 22 Apr 2014 16:18:22 -0700	[thread overview]
Message-ID: <5356F8BE.6000502@samsung.com> (raw)


- made kvm_vm_ioctl_get_dirty_log() generic moved to kvm_main.c, deleted arm,x86  versions
- optimized kvm_mmu_write_protected_pt_masked() to skip upper table lookups


Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
---
 arch/arm/include/asm/kvm_host.h |    3 ++
 arch/arm/kvm/arm.c              |    5 --
 arch/arm/kvm/mmu.c              |   99 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c              |   78 ------------------------------
 virt/kvm/kvm_main.c             |   82 ++++++++++++++++++++++++++++++++
 5 files changed, 184 insertions(+), 83 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 9f827c8..c5c27d8 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -237,5 +237,8 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
 void kvm_tlb_flush_vmid(struct kvm *kvm);
 
 int kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
+void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+		struct kvm_memory_slot *slot,
+		gfn_t gfn_offset, unsigned long mask);
 
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index b916478..6ca3e84 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -784,11 +784,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	}
 }
 
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
-{
-	return -EINVAL;
-}
-
 static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
 					struct kvm_arm_device_addr *dev_addr)
 {
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 4d029a6..52d4dd6 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -812,6 +812,105 @@ int kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
 	return 0;
 }
 
+/**
+ * kvm_mmu_write_protected_pt_masked - after migration thread write protects
+ *  the entire VM address space itterative call are made to get diry pags
+ *  as the VM pages are being migrated. New dirty pages may be subset
+ *  of initial WPed VM or new writes faulted in. Here write protect new
+ *  dirty pages again in preparation of next dirty log read. This function is
+ *  called as a result KVM_GET_DIRTY_LOG ioctl, to determine what pages
+ *  need to be migrated.
+ *   'kvm->mmu_lock' must be  held to protect against concurrent modification
+ *   of page tables (2nd stage fault, mmu modifiers, ...)
+ *
+ * @kvm:        The KVM pointer
+ * @slot:       The memory slot the dirty log is retrieved for
+ * @gfn_offset: The gfn offset in memory slot
+ * @mask:       The mask of dirty pages at offset 'gnf_offset in this memory
+ *              slot to be writ protect
+ */
+void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+		struct kvm_memory_slot *slot,
+		gfn_t gfn_offset, unsigned long mask)
+{
+	phys_addr_t ipa, next, offset_ipa;
+	pgd_t *pgdp = kvm->arch.pgd, *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	gfn_t gfnofst = slot->base_gfn + gfn_offset;
+	bool crosses_pmd;
+
+	ipa = (gfnofst + __ffs(mask)) << PAGE_SHIFT;
+	offset_ipa  = gfnofst << PAGE_SHIFT;
+	next = (gfnofst + (BITS_PER_LONG - 1)) << PAGE_SHIFT;
+
+	/* check if mask width crosses 2nd level page table range, and
+	 * possibly 3rd, 4th. If not skip upper table lookups. Unlikely
+	 * to be true machine memory regions tend to start on atleast PMD
+	 * boundary and mask is a power of 2.
+	 */
+	crosses_pmd = ((offset_ipa & PMD_MASK) ^ (next & PMD_MASK)) ? true :
+									false;
+	/* If pgd, pud, pmd not present and you cross pmd range check next
+	 * index. Unlikely that pgd and pud would be not present. Between
+	 * dirty page marking and now page tables may have been altered.
+	 */
+	pgd = pgdp + pgd_index(ipa);
+	if (unlikely(crosses_pmd && !pgd_present(*pgd))) {
+		pgd = pgdp + pgd_index(next);
+		if (!pgd_present(*pgd))
+			return;
+	}
+
+	pud = pud_offset(pgd, ipa);
+	if (unlikely(crosses_pmd && !pud_present(*pud))) {
+		pud = pud_offset(pgd, next);
+		if (!pud_present(*pud))
+			return;
+	}
+
+	pmd = pmd_offset(pud, ipa);
+	if (unlikely(crosses_pmd && !pmd_present(*pmd))) {
+		pmd = pmd_offset(pud, next);
+		if (!pmd_present(*pmd))
+			return;
+	}
+
+	for (;;) {
+		pte = pte_offset_kernel(pmd, ipa);
+		if (!pte_present(*pte))
+			goto next_ipa;
+
+		if ((*pte & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY)
+			goto next_ipa;
+
+		stage2_mark_pte_ro(pte);
+
+next_ipa:
+		mask &= mask - 1;
+		if (!mask)
+			break;
+
+		/* find next page */
+		ipa = (gfnofst + __ffs(mask)) << PAGE_SHIFT;
+
+		/* skip upper page table lookups */
+		if (!crosses_pmd)
+			continue;
+
+		pgd = pgdp + pgd_index(ipa);
+		if (unlikely(!pgd_present(*pgd)))
+			goto next_ipa;
+		pud = pud_offset(pgd, ipa);
+		if (unlikely(!pud_present(*pud)))
+			goto next_ipa;
+		pmd = pmd_offset(pud, ipa);
+		if (unlikely(!pmd_present(*pmd)))
+			goto next_ipa;
+	}
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot,
 			  unsigned long fault_status)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2b85784..316b655 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3539,84 +3539,6 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 	return 0;
 }
 
-/**
- * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
- * @kvm: kvm instance
- * @log: slot id and address to which we copy the log
- *
- * We need to keep it in mind that VCPU threads can write to the bitmap
- * concurrently.  So, to avoid losing data, we keep the following order for
- * each bit:
- *
- *   1. Take a snapshot of the bit and clear it if needed.
- *   2. Write protect the corresponding page.
- *   3. Flush TLB's if needed.
- *   4. Copy the snapshot to the userspace.
- *
- * Between 2 and 3, the guest may write to the page using the remaining TLB
- * entry.  This is not a problem because the page will be reported dirty at
- * step 4 using the snapshot taken before and step 3 ensures that successive
- * writes will be logged for the next call.
- */
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
-{
-	int r;
-	struct kvm_memory_slot *memslot;
-	unsigned long n, i;
-	unsigned long *dirty_bitmap;
-	unsigned long *dirty_bitmap_buffer;
-	bool is_dirty = false;
-
-	mutex_lock(&kvm->slots_lock);
-
-	r = -EINVAL;
-	if (log->slot >= KVM_USER_MEM_SLOTS)
-		goto out;
-
-	memslot = id_to_memslot(kvm->memslots, log->slot);
-
-	dirty_bitmap = memslot->dirty_bitmap;
-	r = -ENOENT;
-	if (!dirty_bitmap)
-		goto out;
-
-	n = kvm_dirty_bitmap_bytes(memslot);
-
-	dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
-	memset(dirty_bitmap_buffer, 0, n);
-
-	spin_lock(&kvm->mmu_lock);
-
-	for (i = 0; i < n / sizeof(long); i++) {
-		unsigned long mask;
-		gfn_t offset;
-
-		if (!dirty_bitmap[i])
-			continue;
-
-		is_dirty = true;
-
-		mask = xchg(&dirty_bitmap[i], 0);
-		dirty_bitmap_buffer[i] = mask;
-
-		offset = i * BITS_PER_LONG;
-		kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
-	}
-	if (is_dirty)
-		kvm_flush_remote_tlbs(kvm);
-
-	spin_unlock(&kvm->mmu_lock);
-
-	r = -EFAULT;
-	if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
-		goto out;
-
-	r = 0;
-out:
-	mutex_unlock(&kvm->slots_lock);
-	return r;
-}
-
 int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
 			bool line_status)
 {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1d11912..879815c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -432,6 +432,88 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
 	return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
 }
 
+/**
+ * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
+ * @kvm: kvm instance
+ * @log: slot id and address to which we copy the log
+ *
+ * Shared x86 and ARM.
+ *
+ * We need to keep it in mind that VCPU threads can write to the bitmap
+ * concurrently.  So, to avoid losing data, we keep the following order for
+ * each bit:
+ *
+ *   1. Take a snapshot of the bit and clear it if needed.
+ *   2. Write protect the corresponding page.
+ *   3. Flush TLB's if needed.
+ *   4. Copy the snapshot to the userspace.
+ *
+ * Between 2 and 3, the guest may write to the page using the remaining TLB
+ * entry.  This is not a problem because the page will be reported dirty at
+ * step 4 using the snapshot taken before and step 3 ensures that successive
+ * writes will be logged for the next call.
+ *
+ */
+int __weak kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+						struct kvm_dirty_log *log)
+{
+	int r;
+	struct kvm_memory_slot *memslot;
+	unsigned long n, i;
+	unsigned long *dirty_bitmap;
+	unsigned long *dirty_bitmap_buffer;
+	bool is_dirty = false;
+
+	mutex_lock(&kvm->slots_lock);
+
+	r = -EINVAL;
+	if (log->slot >= KVM_USER_MEM_SLOTS)
+		goto out;
+
+	memslot = id_to_memslot(kvm->memslots, log->slot);
+
+	dirty_bitmap = memslot->dirty_bitmap;
+	r = -ENOENT;
+	if (!dirty_bitmap)
+		goto out;
+
+	n = kvm_dirty_bitmap_bytes(memslot);
+
+	dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
+	memset(dirty_bitmap_buffer, 0, n);
+
+	spin_lock(&kvm->mmu_lock);
+
+	for (i = 0; i < n / sizeof(long); i++) {
+		unsigned long mask;
+		gfn_t offset;
+
+		if (!dirty_bitmap[i])
+			continue;
+
+		is_dirty = true;
+
+		mask = xchg(&dirty_bitmap[i], 0);
+		dirty_bitmap_buffer[i] = mask;
+
+		offset = i * BITS_PER_LONG;
+		kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
+	}
+	if (is_dirty)
+		kvm_flush_remote_tlbs(kvm);
+
+	spin_unlock(&kvm->mmu_lock);
+
+	r = -EFAULT;
+	if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
+		goto out;
+
+	r = 0;
+out:
+	mutex_unlock(&kvm->slots_lock);
+	return r;
+}
+
 #else  /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */
 
 static int kvm_init_mmu_notifier(struct kvm *kvm)
-- 
1.7.9.5

                 reply	other threads:[~2014-04-22 23:18 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:9f827c8 dfblob:c5c27d8 dfblob:b916478 dfblob:6ca3e84
dfblob:4d029a6 dfblob:52d4dd6 dfblob:2b85784 dfblob:316b655
dfblob:1d11912 dfblob:879815c )
 OR (
bs:"[PATCH v3 3/4] live migration support for VM dirty log management" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5356F8BE.6000502@samsung.com \
    --to=m.smarduch@samsung.com \
    --cc=christoffer.dall@linaro.org \
    --cc=gavin.guo@canonical.com \
    --cc=jays.lee@samsung.com \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.cs.columbia.edu \
    --cc=marc.zyngier@arm.com \
    --cc=sungjinn.chung@samsung.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.