All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrea Arcangeli <andrea-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
To: Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
Cc: Marcelo Tosatti <marcelo-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org>,
	kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
Subject: Re: KVM swapping with mmu notifiers
Date: Mon, 14 Jan 2008 18:44:47 +0100	[thread overview]
Message-ID: <20080114174447.GA30812@v2.random> (raw)
In-Reply-To: <478B833E.1020801-atKUWr5tajBWk0Htik3J/w@public.gmane.org>

On Mon, Jan 14, 2008 at 05:43:58PM +0200, Avi Kivity wrote:
> heavy handed.  Maybe it can be fixed in some clever way with rcu or with a 
> rwlock around the memory slot map.

Ok, first the alias looked superflous so I just dropped it (the whole
point of the alias is to never call get_user_pages in the aliased hva,
so the notifiers should never trigger in those alias gfn ranges).

And the code was doing hva->gfn->rmap, while I'm now doing hva->rmap
directly to avoid two loops over the memslot instead of only one. To
serialize hva_to_rmapp mmap_sem is taken (in any mode) or the mmu_lock
is taken, and it's enough to serialize the insertion with the mmu_lock
and setting userspace_addr to 0 before insertion if the buffer isn't
allocated by userland. Setting userspace_addr atomically will make the
memslot visible.

This is a new version of the kvm.git patch.

Please Marcelo let me know if you see more problems, thanks a lot for
the review!

Signed-off-by: Andrea Arcangeli <andrea-atKUWr5tajBWk0Htik3J/w@public.gmane.org>

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 4086080..c527d7d 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -18,6 +18,7 @@ config KVM
 	tristate "Kernel-based Virtual Machine (KVM) support"
 	depends on ARCH_SUPPORTS_KVM && EXPERIMENTAL
 	select PREEMPT_NOTIFIERS
+	select MMU_NOTIFIER
 	select ANON_INODES
 	---help---
 	  Support hosting fully virtualized guest machines using hardware
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 324ff9a..189f3e1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -532,6 +532,38 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
 		kvm_flush_remote_tlbs(kvm);
 }
 
+static void unmap_spte(struct kvm *kvm, u64 *spte)
+{
+	struct page *page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
+	get_page(page);
+	rmap_remove(kvm, spte);
+	set_shadow_pte(spte, shadow_trap_nonpresent_pte);
+	kvm_flush_remote_tlbs(kvm);
+	__free_page(page);
+}
+
+void kvm_rmap_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+	unsigned long *rmapp;
+	u64 *spte, *curr_spte;
+
+	spin_lock(&kvm->mmu_lock);
+	rmapp = kvm_hva_to_rmapp(kvm, hva);
+	if (!rmapp)
+		goto out_unlock;
+
+	spte = rmap_next(kvm, rmapp, NULL);
+	while (spte) {
+		BUG_ON(!(*spte & PT_PRESENT_MASK));
+		rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
+		curr_spte = spte;
+		spte = rmap_next(kvm, rmapp, spte);
+		unmap_spte(kvm, curr_spte);
+	}
+out_unlock:
+	spin_unlock(&kvm->mmu_lock);
+}
+
 #ifdef MMU_DEBUG
 static int is_empty_shadow_page(u64 *spt)
 {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8a90403..271ce12 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3159,6 +3159,33 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	free_page((unsigned long)vcpu->arch.pio_data);
 }
 
+static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
+{
+	return container_of(mn, struct kvm, mmu_notifier);
+}
+
+void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
+				      struct mm_struct *mm,
+				      unsigned long address)
+{
+	struct kvm *kvm = mmu_notifier_to_kvm(mn);
+	BUG_ON(mm != kvm->mm);
+	kvm_rmap_unmap_hva(kvm, address);
+}
+
+void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn,
+				       struct mm_struct *mm,
+				       unsigned long start, unsigned long end)
+{
+	for (; start < end; start += PAGE_SIZE)
+		kvm_mmu_notifier_invalidate_page(mn, mm, start);
+}
+
+static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
+	.invalidate_range	= kvm_mmu_notifier_invalidate_range,
+	.invalidate_page	= kvm_mmu_notifier_invalidate_page,
+};
+
 struct  kvm *kvm_arch_create_vm(void)
 {
 	struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
@@ -3167,6 +3194,7 @@ struct  kvm *kvm_arch_create_vm(void)
 		return ERR_PTR(-ENOMEM);
 
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+	kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
 
 	return kvm;
 }
@@ -3219,14 +3247,20 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 	 */
 	if (!user_alloc) {
 		if (npages && !old.rmap) {
-			memslot->userspace_addr = do_mmap(NULL, 0,
-						     npages * PAGE_SIZE,
-						     PROT_READ | PROT_WRITE,
-						     MAP_SHARED | MAP_ANONYMOUS,
-						     0);
-
-			if (IS_ERR((void *)memslot->userspace_addr))
-				return PTR_ERR((void *)memslot->userspace_addr);
+			unsigned long userspace_addr;
+			
+			userspace_addr = do_mmap(NULL, 0,
+						 npages * PAGE_SIZE,
+						 PROT_READ | PROT_WRITE,
+						 MAP_SHARED | MAP_ANONYMOUS,
+						 0);
+			if (IS_ERR((void *)userspace_addr))
+				return PTR_ERR((void *)userspace_addr);
+
+			/* set userspace_addr atomically for kvm_hva_to_rmapp */
+			spin_lock(&kvm->mmu_lock);
+			memslot->userspace_addr = userspace_addr;
+			spin_unlock(&kvm->mmu_lock);
 		} else {
 			if (!old.user_alloc && old.rmap) {
 				int ret;
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index d6db0de..522028b 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -404,6 +404,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu);
 int kvm_mmu_setup(struct kvm_vcpu *vcpu);
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
 
+void kvm_rmap_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
 void kvm_mmu_zap_all(struct kvm *kvm);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2714068..eae8734 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -117,6 +117,7 @@ struct kvm {
 	struct kvm_io_bus pio_bus;
 	struct kvm_vm_stat stat;
 	struct kvm_arch arch;
+	struct mmu_notifier mmu_notifier;
 };
 
 /* The guest did something we don't support. */
@@ -163,6 +164,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot old,
 				int user_alloc);
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
+unsigned long *kvm_hva_to_rmapp(struct kvm *kvm, unsigned long addr);
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 void kvm_release_page_clean(struct page *page);
 void kvm_release_page_dirty(struct page *page);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4295623..a67e38f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -165,6 +165,7 @@ static struct kvm *kvm_create_vm(void)
 
 	kvm->mm = current->mm;
 	atomic_inc(&kvm->mm->mm_count);
+	mmu_notifier_register(&kvm->mmu_notifier, kvm->mm);
 	spin_lock_init(&kvm->mmu_lock);
 	kvm_io_bus_init(&kvm->pio_bus);
 	mutex_init(&kvm->lock);
@@ -298,7 +299,15 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		memset(new.rmap, 0, npages * sizeof(*new.rmap));
 
 		new.user_alloc = user_alloc;
-		new.userspace_addr = mem->userspace_addr;
+		/*
+		 * hva_to_rmmap() serialzies with the mmu_lock and to be
+		 * safe it has to ignore memslots with !user_alloc &&
+		 * !userspace_addr.
+		 */
+		if (user_alloc)
+			new.userspace_addr = mem->userspace_addr;
+		else
+			new.userspace_addr = 0;
 	}
 
 	/* Allocate page dirty bitmap if needed */
@@ -311,14 +320,18 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		memset(new.dirty_bitmap, 0, dirty_bytes);
 	}
 
+	spin_lock(&kvm->mmu_lock);
 	if (mem->slot >= kvm->nmemslots)
 		kvm->nmemslots = mem->slot + 1;
 
 	*memslot = new;
+	spin_unlock(&kvm->mmu_lock);
 
 	r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc);
 	if (r) {
+		spin_lock(&kvm->mmu_lock);
 		*memslot = old;
+		spin_unlock(&kvm->mmu_lock);
 		goto out_free;
 	}
 
@@ -454,6 +467,28 @@ static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 	return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
 }
 
+/* if mmap_sem isn't taken, it can be safely called with only the mmu_lock */
+unsigned long *kvm_hva_to_rmapp(struct kvm *kvm, unsigned long addr)
+{
+	int i;
+
+	for (i = 0; i < kvm->nmemslots; i++) {
+		struct kvm_memory_slot *memslot = &kvm->memslots[i];
+		unsigned long start = memslot->userspace_addr;
+		unsigned long end = start + (memslot->npages << PAGE_SHIFT);
+
+		/* mmu_lock protects userspace_addr */
+		if (!start)
+			continue;
+
+		if (addr >= start && addr < end) {
+			gfn_t gfn_offset = (addr - start) >> PAGE_SHIFT;
+			return &memslot->rmap[gfn_offset];
+		}
+	}
+	return NULL;
+}
+
 /*
  * Requires current->mm->mmap_sem to be held
  */

-------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace

  parent reply	other threads:[~2008-01-14 17:44 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-01-13 13:32 KVM swapping with mmu notifiers Andrea Arcangeli
     [not found] ` <20080113133244.GC8736-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-13 15:02   ` Anthony Liguori
2008-01-14 13:45   ` Marcelo Tosatti
2008-01-14 14:06     ` Andrea Arcangeli
2008-01-14 14:09     ` Avi Kivity
     [not found]       ` <478B6CFF.9070801-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-14 14:24         ` Andrea Arcangeli
     [not found]           ` <20080114142457.GF7062-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-14 15:43             ` Avi Kivity
     [not found]               ` <478B833E.1020801-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-14 17:44                 ` Andrea Arcangeli [this message]
     [not found]                   ` <20080114174447.GA30812-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-15 14:40                     ` Avi Kivity
     [not found]                       ` <478CC5D3.2040201-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-15 15:52                         ` Andrea Arcangeli
     [not found]                           ` <20080115155253.GA7059-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-15 15:57                             ` Avi Kivity
     [not found]                               ` <478CD7CF.3080603-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-15 16:09                                 ` Andrea Arcangeli
     [not found]                                   ` <20080115160936.GC7059-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-20 15:16                                     ` Avi Kivity
     [not found]                                       ` <479365B3.3000600-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-21 11:37                                         ` Andrea Arcangeli
     [not found]                                           ` <20080121113715.GE6970-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-21 12:53                                             ` Avi Kivity
2008-01-22 13:37                                             ` Avi Kivity
     [not found]                                               ` <4795F1B7.9050604-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-22 14:56                                                 ` Andrea Arcangeli
     [not found]                                                   ` <20080122145631.GG7331-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-22 16:17                                                     ` Avi Kivity
     [not found]                                                       ` <47961722.2010804-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-22 17:18                                                         ` Andrea Arcangeli
     [not found]                                                           ` <20080122171806.GH7331-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-22 20:03                                                             ` Andrea Arcangeli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080114174447.GA30812@v2.random \
    --to=andrea-atkuwr5tajbwk0htik3j/w@public.gmane.org \
    --cc=avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org \
    --cc=kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org \
    --cc=marcelo-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.