All of lore.kernel.org
 help / color / mirror / Atom feed
From: m.smarduch@samsung.com (Mario Smarduch)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH v4 3/5] live migration support for VM dirty log management
Date: Mon, 28 Apr 2014 17:55:01 -0700	[thread overview]
Message-ID: <535EF865.2040702@samsung.com> (raw)


This patch adds support for keeping track of VM dirty pages, by updating 
per memslot dirty bitmap and write protecting the page again.


Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
---
 arch/arm/include/asm/kvm_host.h |    3 ++
 arch/arm/kvm/arm.c              |    5 --
 arch/arm/kvm/mmu.c              |  101 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c              |   78 ------------------------------
 virt/kvm/kvm_main.c             |   84 ++++++++++++++++++++++++++++++++
 5 files changed, 188 insertions(+), 83 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 9f827c8..c5c27d8 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -237,5 +237,8 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
 void kvm_tlb_flush_vmid(struct kvm *kvm);
 
 int kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
+void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+		struct kvm_memory_slot *slot,
+		gfn_t gfn_offset, unsigned long mask);
 
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index b916478..6ca3e84 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -784,11 +784,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	}
 }
 
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
-{
-	return -EINVAL;
-}
-
 static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
 					struct kvm_arm_device_addr *dev_addr)
 {
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 15bbca2..3442594 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -864,6 +864,107 @@ out:
 	return ret;
 }
 
+
+/**
+ * kvm_mmu_write_protected_pt_masked - after migration thread write protects
+ *  the entire VM address space itterative call are made to get diry pags
+ *  as the VM pages are being migrated. New dirty pages may be subset
+ *  of initial WPed VM or new writes faulted in. Here write protect new
+ *  dirty pages again in preparation of next dirty log read. This function is
+ *  called as a result KVM_GET_DIRTY_LOG ioctl, to determine what pages
+ *  need to be migrated.
+ *   'kvm->mmu_lock' must be  held to protect against concurrent modification
+ *   of page tables (2nd stage fault, mmu modifiers, ...)
+ *
+ * @kvm:        The KVM pointer
+ * @slot:       The memory slot the dirty log is retrieved for
+ * @gfn_offset: The gfn offset in memory slot
+ * @mask:       The mask of dirty pages at offset 'gnf_offset in this memory
+ *              slot to be writ protect
+ */
+
+void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+		struct kvm_memory_slot *slot,
+		gfn_t gfn_offset, unsigned long mask)
+{
+	phys_addr_t ipa, next, offset_ipa;
+	pgd_t *pgdp = kvm->arch.pgd, *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	gfn_t gfnofst = slot->base_gfn + gfn_offset;
+	bool crosses_pmd;
+
+	ipa = (gfnofst + __ffs(mask)) << PAGE_SHIFT;
+	offset_ipa  = gfnofst << PAGE_SHIFT;
+	next = (gfnofst + (BITS_PER_LONG - 1)) << PAGE_SHIFT;
+
+	/* check if mask width crosses 2nd level page table range, and
+	 * possibly 3rd, 4th. If not skip upper table lookups. Unlikely
+	 * to be true machine memory regions tend to start on atleast PMD
+	 * boundary and mask is a power of 2.
+	 */
+	crosses_pmd = ((offset_ipa & PMD_MASK) ^ (next & PMD_MASK)) ? true :
+									false;
+
+	/* If pgd, pud, pmd not present and you cross pmd range check next
+	 * index. Unlikely that pgd and pud would be not present. Between
+	 * dirty page marking and now page tables may have been altered.
+	 */
+	pgd = pgdp + pgd_index(ipa);
+	if (unlikely(crosses_pmd && !pgd_present(*pgd))) {
+		pgd = pgdp + pgd_index(next);
+		if (!pgd_present(*pgd))
+			return;
+	}
+
+	pud = pud_offset(pgd, ipa);
+	if (unlikely(crosses_pmd && !pud_present(*pud))) {
+		pud = pud_offset(pgd, next);
+		if (!pud_present(*pud))
+			return;
+	}
+
+	pmd = pmd_offset(pud, ipa);
+	if (unlikely(crosses_pmd && !pmd_present(*pmd))) {
+		pmd = pmd_offset(pud, next);
+		if (!pmd_present(*pmd))
+			return;
+	}
+
+	for (;;) {
+		pte = pte_offset_kernel(pmd, ipa);
+		if (!pte_present(*pte))
+			goto next_ipa;
+
+		if (kvm_s2pte_readonly(pte))
+			goto next_ipa;
+		kvm_set_s2pte_readonly(pte);
+next_ipa:
+		mask &= mask - 1;
+		if (!mask)
+			break;
+
+		/* find next page */
+		ipa = (gfnofst + __ffs(mask)) << PAGE_SHIFT;
+
+		/* skip upper page table lookups */
+		if (!crosses_pmd)
+			continue;
+
+		pgd = pgdp + pgd_index(ipa);
+		if (unlikely(!pgd_present(*pgd)))
+			goto next_ipa;
+		pud = pud_offset(pgd, ipa);
+		if (unlikely(!pud_present(*pud)))
+			goto next_ipa;
+		pmd = pmd_offset(pud, ipa);
+		if (unlikely(!pmd_present(*pmd)))
+			goto next_ipa;
+	}
+}
+
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot,
 			  unsigned long fault_status)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2b85784..316b655 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3539,84 +3539,6 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 	return 0;
 }
 
-/**
- * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
- * @kvm: kvm instance
- * @log: slot id and address to which we copy the log
- *
- * We need to keep it in mind that VCPU threads can write to the bitmap
- * concurrently.  So, to avoid losing data, we keep the following order for
- * each bit:
- *
- *   1. Take a snapshot of the bit and clear it if needed.
- *   2. Write protect the corresponding page.
- *   3. Flush TLB's if needed.
- *   4. Copy the snapshot to the userspace.
- *
- * Between 2 and 3, the guest may write to the page using the remaining TLB
- * entry.  This is not a problem because the page will be reported dirty at
- * step 4 using the snapshot taken before and step 3 ensures that successive
- * writes will be logged for the next call.
- */
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
-{
-	int r;
-	struct kvm_memory_slot *memslot;
-	unsigned long n, i;
-	unsigned long *dirty_bitmap;
-	unsigned long *dirty_bitmap_buffer;
-	bool is_dirty = false;
-
-	mutex_lock(&kvm->slots_lock);
-
-	r = -EINVAL;
-	if (log->slot >= KVM_USER_MEM_SLOTS)
-		goto out;
-
-	memslot = id_to_memslot(kvm->memslots, log->slot);
-
-	dirty_bitmap = memslot->dirty_bitmap;
-	r = -ENOENT;
-	if (!dirty_bitmap)
-		goto out;
-
-	n = kvm_dirty_bitmap_bytes(memslot);
-
-	dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
-	memset(dirty_bitmap_buffer, 0, n);
-
-	spin_lock(&kvm->mmu_lock);
-
-	for (i = 0; i < n / sizeof(long); i++) {
-		unsigned long mask;
-		gfn_t offset;
-
-		if (!dirty_bitmap[i])
-			continue;
-
-		is_dirty = true;
-
-		mask = xchg(&dirty_bitmap[i], 0);
-		dirty_bitmap_buffer[i] = mask;
-
-		offset = i * BITS_PER_LONG;
-		kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
-	}
-	if (is_dirty)
-		kvm_flush_remote_tlbs(kvm);
-
-	spin_unlock(&kvm->mmu_lock);
-
-	r = -EFAULT;
-	if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
-		goto out;
-
-	r = 0;
-out:
-	mutex_unlock(&kvm->slots_lock);
-	return r;
-}
-
 int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
 			bool line_status)
 {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1d11912..5aafeb8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -432,6 +432,90 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
 	return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
 }
 
+/**
+ * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
+ * @kvm: kvm instance
+ * @log: slot id and address to which we copy the log
+ *
+ * Shared by x86 and ARM.
+ *
+ * We need to keep it in mind that VCPU threads can write to the bitmap
+ * concurrently.  So, to avoid losing data, we keep the following order for
+ * each bit:
+ *
+ *   1. Take a snapshot of the bit and clear it if needed.
+ *   2. Write protect the corresponding page.
+ *   3. Flush TLB's if needed.
+ *   4. Copy the snapshot to the userspace.
+ *
+ * Between 2 and 3, the guest may write to the page using the remaining TLB
+ * entry.  This is not a problem because the page will be reported dirty at
+ * step 4 using the snapshot taken before and step 3 ensures that successive
+ * writes will be logged for the next call.
+ */
+
+int __weak kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+						struct kvm_dirty_log *log)
+{
+	int r;
+	struct kvm_memory_slot *memslot;
+	unsigned long n, i;
+	unsigned long *dirty_bitmap;
+	unsigned long *dirty_bitmap_buffer;
+	bool is_dirty = false;
+
+	mutex_lock(&kvm->slots_lock);
+
+	r = -EINVAL;
+	if (log->slot >= KVM_USER_MEM_SLOTS)
+		goto out;
+
+	memslot = id_to_memslot(kvm->memslots, log->slot);
+
+	dirty_bitmap = memslot->dirty_bitmap;
+	r = -ENOENT;
+	if (!dirty_bitmap)
+		goto out;
+
+	n = kvm_dirty_bitmap_bytes(memslot);
+
+	dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
+	memset(dirty_bitmap_buffer, 0, n);
+
+	spin_lock(&kvm->mmu_lock);
+
+	for (i = 0; i < n / sizeof(long); i++) {
+		unsigned long mask;
+		gfn_t offset;
+
+		if (!dirty_bitmap[i])
+			continue;
+
+		is_dirty = true;
+
+		mask = xchg(&dirty_bitmap[i], 0);
+		dirty_bitmap_buffer[i] = mask;
+
+		offset = i * BITS_PER_LONG;
+		kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
+	}
+	if (is_dirty)
+		kvm_flush_remote_tlbs(kvm);
+
+	spin_unlock(&kvm->mmu_lock);
+
+	r = -EFAULT;
+	if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
+		goto out;
+
+	r = 0;
+out:
+	mutex_unlock(&kvm->slots_lock);
+	return r;
+}
+
+
+
 #else  /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */
 
 static int kvm_init_mmu_notifier(struct kvm *kvm)
-- 
1.7.9.5

WARNING: multiple messages have this Message-ID (diff)
From: Mario Smarduch <m.smarduch@samsung.com>
To: "kvmarm@lists.cs.columbia.edu" <kvmarm@lists.cs.columbia.edu>,
	Marc Zyngier <marc.zyngier@arm.com>,
	"christoffer.dall@linaro.org" <christoffer.dall@linaro.org>,
	Steve Capper <steve.capper@arm.com>
Cc: "kvm@vger.kernel.org" <kvm@vger.kernel.org>,
	linux-arm-kernel@lists.infradead.org,
	"gavin.guo@canonical.com" <gavin.guo@canonical.com>,
	"Peter Maydell" <peter.maydell@linaro.org>,
	이정석 <jays.lee@samsung.com>, 정성진 <sungjinn.chung@samsung.com>
Subject: [PATCH v4 3/5] live migration support for VM dirty log management
Date: Mon, 28 Apr 2014 17:55:01 -0700	[thread overview]
Message-ID: <535EF865.2040702@samsung.com> (raw)


This patch adds support for keeping track of VM dirty pages, by updating 
per memslot dirty bitmap and write protecting the page again.


Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
---
 arch/arm/include/asm/kvm_host.h |    3 ++
 arch/arm/kvm/arm.c              |    5 --
 arch/arm/kvm/mmu.c              |  101 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c              |   78 ------------------------------
 virt/kvm/kvm_main.c             |   84 ++++++++++++++++++++++++++++++++
 5 files changed, 188 insertions(+), 83 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 9f827c8..c5c27d8 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -237,5 +237,8 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
 void kvm_tlb_flush_vmid(struct kvm *kvm);
 
 int kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
+void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+		struct kvm_memory_slot *slot,
+		gfn_t gfn_offset, unsigned long mask);
 
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index b916478..6ca3e84 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -784,11 +784,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	}
 }
 
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
-{
-	return -EINVAL;
-}
-
 static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
 					struct kvm_arm_device_addr *dev_addr)
 {
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 15bbca2..3442594 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -864,6 +864,107 @@ out:
 	return ret;
 }
 
+
+/**
+ * kvm_mmu_write_protected_pt_masked - after migration thread write protects
+ *  the entire VM address space itterative call are made to get diry pags
+ *  as the VM pages are being migrated. New dirty pages may be subset
+ *  of initial WPed VM or new writes faulted in. Here write protect new
+ *  dirty pages again in preparation of next dirty log read. This function is
+ *  called as a result KVM_GET_DIRTY_LOG ioctl, to determine what pages
+ *  need to be migrated.
+ *   'kvm->mmu_lock' must be  held to protect against concurrent modification
+ *   of page tables (2nd stage fault, mmu modifiers, ...)
+ *
+ * @kvm:        The KVM pointer
+ * @slot:       The memory slot the dirty log is retrieved for
+ * @gfn_offset: The gfn offset in memory slot
+ * @mask:       The mask of dirty pages at offset 'gnf_offset in this memory
+ *              slot to be writ protect
+ */
+
+void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+		struct kvm_memory_slot *slot,
+		gfn_t gfn_offset, unsigned long mask)
+{
+	phys_addr_t ipa, next, offset_ipa;
+	pgd_t *pgdp = kvm->arch.pgd, *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	gfn_t gfnofst = slot->base_gfn + gfn_offset;
+	bool crosses_pmd;
+
+	ipa = (gfnofst + __ffs(mask)) << PAGE_SHIFT;
+	offset_ipa  = gfnofst << PAGE_SHIFT;
+	next = (gfnofst + (BITS_PER_LONG - 1)) << PAGE_SHIFT;
+
+	/* check if mask width crosses 2nd level page table range, and
+	 * possibly 3rd, 4th. If not skip upper table lookups. Unlikely
+	 * to be true machine memory regions tend to start on atleast PMD
+	 * boundary and mask is a power of 2.
+	 */
+	crosses_pmd = ((offset_ipa & PMD_MASK) ^ (next & PMD_MASK)) ? true :
+									false;
+
+	/* If pgd, pud, pmd not present and you cross pmd range check next
+	 * index. Unlikely that pgd and pud would be not present. Between
+	 * dirty page marking and now page tables may have been altered.
+	 */
+	pgd = pgdp + pgd_index(ipa);
+	if (unlikely(crosses_pmd && !pgd_present(*pgd))) {
+		pgd = pgdp + pgd_index(next);
+		if (!pgd_present(*pgd))
+			return;
+	}
+
+	pud = pud_offset(pgd, ipa);
+	if (unlikely(crosses_pmd && !pud_present(*pud))) {
+		pud = pud_offset(pgd, next);
+		if (!pud_present(*pud))
+			return;
+	}
+
+	pmd = pmd_offset(pud, ipa);
+	if (unlikely(crosses_pmd && !pmd_present(*pmd))) {
+		pmd = pmd_offset(pud, next);
+		if (!pmd_present(*pmd))
+			return;
+	}
+
+	for (;;) {
+		pte = pte_offset_kernel(pmd, ipa);
+		if (!pte_present(*pte))
+			goto next_ipa;
+
+		if (kvm_s2pte_readonly(pte))
+			goto next_ipa;
+		kvm_set_s2pte_readonly(pte);
+next_ipa:
+		mask &= mask - 1;
+		if (!mask)
+			break;
+
+		/* find next page */
+		ipa = (gfnofst + __ffs(mask)) << PAGE_SHIFT;
+
+		/* skip upper page table lookups */
+		if (!crosses_pmd)
+			continue;
+
+		pgd = pgdp + pgd_index(ipa);
+		if (unlikely(!pgd_present(*pgd)))
+			goto next_ipa;
+		pud = pud_offset(pgd, ipa);
+		if (unlikely(!pud_present(*pud)))
+			goto next_ipa;
+		pmd = pmd_offset(pud, ipa);
+		if (unlikely(!pmd_present(*pmd)))
+			goto next_ipa;
+	}
+}
+
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot,
 			  unsigned long fault_status)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2b85784..316b655 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3539,84 +3539,6 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 	return 0;
 }
 
-/**
- * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
- * @kvm: kvm instance
- * @log: slot id and address to which we copy the log
- *
- * We need to keep it in mind that VCPU threads can write to the bitmap
- * concurrently.  So, to avoid losing data, we keep the following order for
- * each bit:
- *
- *   1. Take a snapshot of the bit and clear it if needed.
- *   2. Write protect the corresponding page.
- *   3. Flush TLB's if needed.
- *   4. Copy the snapshot to the userspace.
- *
- * Between 2 and 3, the guest may write to the page using the remaining TLB
- * entry.  This is not a problem because the page will be reported dirty at
- * step 4 using the snapshot taken before and step 3 ensures that successive
- * writes will be logged for the next call.
- */
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
-{
-	int r;
-	struct kvm_memory_slot *memslot;
-	unsigned long n, i;
-	unsigned long *dirty_bitmap;
-	unsigned long *dirty_bitmap_buffer;
-	bool is_dirty = false;
-
-	mutex_lock(&kvm->slots_lock);
-
-	r = -EINVAL;
-	if (log->slot >= KVM_USER_MEM_SLOTS)
-		goto out;
-
-	memslot = id_to_memslot(kvm->memslots, log->slot);
-
-	dirty_bitmap = memslot->dirty_bitmap;
-	r = -ENOENT;
-	if (!dirty_bitmap)
-		goto out;
-
-	n = kvm_dirty_bitmap_bytes(memslot);
-
-	dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
-	memset(dirty_bitmap_buffer, 0, n);
-
-	spin_lock(&kvm->mmu_lock);
-
-	for (i = 0; i < n / sizeof(long); i++) {
-		unsigned long mask;
-		gfn_t offset;
-
-		if (!dirty_bitmap[i])
-			continue;
-
-		is_dirty = true;
-
-		mask = xchg(&dirty_bitmap[i], 0);
-		dirty_bitmap_buffer[i] = mask;
-
-		offset = i * BITS_PER_LONG;
-		kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
-	}
-	if (is_dirty)
-		kvm_flush_remote_tlbs(kvm);
-
-	spin_unlock(&kvm->mmu_lock);
-
-	r = -EFAULT;
-	if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
-		goto out;
-
-	r = 0;
-out:
-	mutex_unlock(&kvm->slots_lock);
-	return r;
-}
-
 int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
 			bool line_status)
 {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1d11912..5aafeb8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -432,6 +432,90 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
 	return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
 }
 
+/**
+ * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
+ * @kvm: kvm instance
+ * @log: slot id and address to which we copy the log
+ *
+ * Shared by x86 and ARM.
+ *
+ * We need to keep it in mind that VCPU threads can write to the bitmap
+ * concurrently.  So, to avoid losing data, we keep the following order for
+ * each bit:
+ *
+ *   1. Take a snapshot of the bit and clear it if needed.
+ *   2. Write protect the corresponding page.
+ *   3. Flush TLB's if needed.
+ *   4. Copy the snapshot to the userspace.
+ *
+ * Between 2 and 3, the guest may write to the page using the remaining TLB
+ * entry.  This is not a problem because the page will be reported dirty at
+ * step 4 using the snapshot taken before and step 3 ensures that successive
+ * writes will be logged for the next call.
+ */
+
+int __weak kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+						struct kvm_dirty_log *log)
+{
+	int r;
+	struct kvm_memory_slot *memslot;
+	unsigned long n, i;
+	unsigned long *dirty_bitmap;
+	unsigned long *dirty_bitmap_buffer;
+	bool is_dirty = false;
+
+	mutex_lock(&kvm->slots_lock);
+
+	r = -EINVAL;
+	if (log->slot >= KVM_USER_MEM_SLOTS)
+		goto out;
+
+	memslot = id_to_memslot(kvm->memslots, log->slot);
+
+	dirty_bitmap = memslot->dirty_bitmap;
+	r = -ENOENT;
+	if (!dirty_bitmap)
+		goto out;
+
+	n = kvm_dirty_bitmap_bytes(memslot);
+
+	dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
+	memset(dirty_bitmap_buffer, 0, n);
+
+	spin_lock(&kvm->mmu_lock);
+
+	for (i = 0; i < n / sizeof(long); i++) {
+		unsigned long mask;
+		gfn_t offset;
+
+		if (!dirty_bitmap[i])
+			continue;
+
+		is_dirty = true;
+
+		mask = xchg(&dirty_bitmap[i], 0);
+		dirty_bitmap_buffer[i] = mask;
+
+		offset = i * BITS_PER_LONG;
+		kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
+	}
+	if (is_dirty)
+		kvm_flush_remote_tlbs(kvm);
+
+	spin_unlock(&kvm->mmu_lock);
+
+	r = -EFAULT;
+	if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
+		goto out;
+
+	r = 0;
+out:
+	mutex_unlock(&kvm->slots_lock);
+	return r;
+}
+
+
+
 #else  /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */
 
 static int kvm_init_mmu_notifier(struct kvm *kvm)
-- 
1.7.9.5


             reply	other threads:[~2014-04-29  0:55 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-04-29  0:55 Mario Smarduch [this message]
2014-04-29  0:55 ` [PATCH v4 3/5] live migration support for VM dirty log management Mario Smarduch

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=535EF865.2040702@samsung.com \
    --to=m.smarduch@samsung.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.