From: Andrea Arcangeli <andrea-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
To: Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
Cc: Marcelo Tosatti <marcelo-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org>,
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
Subject: Re: KVM swapping with mmu notifiers
Date: Tue, 22 Jan 2008 21:03:52 +0100 [thread overview]
Message-ID: <20080122200352.GA15848@v2.random> (raw)
In-Reply-To: <20080122171806.GH7331-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
This last update will work against mmu-notifiers #v4, this will make
the accessed bitflag in the spte visible to the linux VM so it will
provide an accurate working set detection w/o requiring vmexits.
Signed-off-by: Andrea Arcangeli <andrea-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 4086080..c527d7d 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -18,6 +18,7 @@ config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on ARCH_SUPPORTS_KVM && EXPERIMENTAL
select PREEMPT_NOTIFIERS
+ select MMU_NOTIFIER
select ANON_INODES
---help---
Support hosting fully virtualized guest machines using hardware
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index c85b904..adb20de 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -532,6 +532,110 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
kvm_flush_remote_tlbs(kvm);
}
+static void kvm_unmap_spte(struct kvm *kvm, u64 *spte)
+{
+ struct page *page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
+ get_page(page);
+ rmap_remove(kvm, spte);
+ set_shadow_pte(spte, shadow_trap_nonpresent_pte);
+ kvm_flush_remote_tlbs(kvm);
+ __free_page(page);
+}
+
+static void kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
+{
+ u64 *spte, *curr_spte;
+
+ spte = rmap_next(kvm, rmapp, NULL);
+ while (spte) {
+ BUG_ON(!(*spte & PT_PRESENT_MASK));
+ rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
+ curr_spte = spte;
+ spte = rmap_next(kvm, rmapp, spte);
+ kvm_unmap_spte(kvm, curr_spte);
+ }
+}
+
+void kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+ int i;
+
+ /*
+ * If mmap_sem isn't taken, we can look the memslots with only
+ * the mmu_lock by skipping over the slots with userspace_addr == 0.
+ */
+ spin_lock(&kvm->mmu_lock);
+ for (i = 0; i < kvm->nmemslots; i++) {
+ struct kvm_memory_slot *memslot = &kvm->memslots[i];
+ unsigned long start = memslot->userspace_addr;
+ unsigned long end;
+
+ /* mmu_lock protects userspace_addr */
+ if (!start)
+ continue;
+
+ end = start + (memslot->npages << PAGE_SHIFT);
+ if (hva >= start && hva < end) {
+ gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
+ kvm_unmap_rmapp(kvm, &memslot->rmap[gfn_offset]);
+ }
+ }
+ spin_unlock(&kvm->mmu_lock);
+}
+
+static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
+{
+ u64 *spte;
+ int young = 0;
+
+ spte = rmap_next(kvm, rmapp, NULL);
+ while (spte) {
+ int _young;
+ u64 _spte = *spte;
+ BUG_ON(!(_spte & PT_PRESENT_MASK));
+ _young = _spte & PT_ACCESSED_MASK;
+ if (_young) {
+ young = !!_young;
+ set_shadow_pte(spte, _spte & ~PT_ACCESSED_MASK);
+ }
+ spte = rmap_next(kvm, rmapp, spte);
+ }
+ return young;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ int i;
+ int young = 0;
+
+ /*
+ * If mmap_sem isn't taken, we can look the memslots with only
+ * the mmu_lock by skipping over the slots with userspace_addr == 0.
+ */
+ spin_lock(&kvm->mmu_lock);
+ for (i = 0; i < kvm->nmemslots; i++) {
+ struct kvm_memory_slot *memslot = &kvm->memslots[i];
+ unsigned long start = memslot->userspace_addr;
+ unsigned long end;
+
+ /* mmu_lock protects userspace_addr */
+ if (!start)
+ continue;
+
+ end = start + (memslot->npages << PAGE_SHIFT);
+ if (hva >= start && hva < end) {
+ gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
+ young |= kvm_age_rmapp(kvm, &memslot->rmap[gfn_offset]);
+ }
+ }
+ spin_unlock(&kvm->mmu_lock);
+
+ if (young)
+ kvm_flush_remote_tlbs(kvm);
+
+ return young;
+}
+
#ifdef MMU_DEBUG
static int is_empty_shadow_page(u64 *spt)
{
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f94a0b..35bb114 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3167,6 +3167,43 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
free_page((unsigned long)vcpu->arch.pio_data);
}
+static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
+{
+ return container_of(mn, struct kvm, mmu_notifier);
+}
+
+void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long address)
+{
+ struct kvm *kvm = mmu_notifier_to_kvm(mn);
+ BUG_ON(mm != kvm->mm);
+ kvm_unmap_hva(kvm, address);
+}
+
+void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ for (; start < end; start += PAGE_SIZE)
+ kvm_mmu_notifier_invalidate_page(mn, mm, start);
+}
+
+int kvm_mmu_notifier_age_page(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long address)
+{
+ struct kvm *kvm = mmu_notifier_to_kvm(mn);
+ BUG_ON(mm != kvm->mm);
+ return kvm_age_hva(kvm, address);
+}
+
+static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
+ .invalidate_range = kvm_mmu_notifier_invalidate_range,
+ .invalidate_page = kvm_mmu_notifier_invalidate_page,
+ .age_page = kvm_mmu_notifier_age_page,
+};
+
struct kvm *kvm_arch_create_vm(void)
{
struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
@@ -3175,6 +3212,7 @@ struct kvm *kvm_arch_create_vm(void)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+ kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
return kvm;
}
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index d6db0de..18496e0 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -404,6 +404,8 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu);
int kvm_mmu_setup(struct kvm_vcpu *vcpu);
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
+void kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+int kvm_age_hva(struct kvm *kvm, unsigned long hva);
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
void kvm_mmu_zap_all(struct kvm *kvm);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ea4764b..51c9bb8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -118,6 +118,7 @@ struct kvm {
struct kvm_io_bus pio_bus;
struct kvm_vm_stat stat;
struct kvm_arch arch;
+ struct mmu_notifier mmu_notifier;
};
/* The guest did something we don't support. */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4295623..b5454d1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -165,6 +165,7 @@ static struct kvm *kvm_create_vm(void)
kvm->mm = current->mm;
atomic_inc(&kvm->mm->mm_count);
+ mmu_notifier_register(&kvm->mmu_notifier, kvm->mm);
spin_lock_init(&kvm->mmu_lock);
kvm_io_bus_init(&kvm->pio_bus);
mutex_init(&kvm->lock);
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
prev parent reply other threads:[~2008-01-22 20:03 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-01-13 13:32 KVM swapping with mmu notifiers Andrea Arcangeli
[not found] ` <20080113133244.GC8736-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-13 15:02 ` Anthony Liguori
2008-01-14 13:45 ` Marcelo Tosatti
2008-01-14 14:06 ` Andrea Arcangeli
2008-01-14 14:09 ` Avi Kivity
[not found] ` <478B6CFF.9070801-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-14 14:24 ` Andrea Arcangeli
[not found] ` <20080114142457.GF7062-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-14 15:43 ` Avi Kivity
[not found] ` <478B833E.1020801-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-14 17:44 ` Andrea Arcangeli
[not found] ` <20080114174447.GA30812-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-15 14:40 ` Avi Kivity
[not found] ` <478CC5D3.2040201-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-15 15:52 ` Andrea Arcangeli
[not found] ` <20080115155253.GA7059-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-15 15:57 ` Avi Kivity
[not found] ` <478CD7CF.3080603-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-15 16:09 ` Andrea Arcangeli
[not found] ` <20080115160936.GC7059-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-20 15:16 ` Avi Kivity
[not found] ` <479365B3.3000600-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-21 11:37 ` Andrea Arcangeli
[not found] ` <20080121113715.GE6970-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-21 12:53 ` Avi Kivity
2008-01-22 13:37 ` Avi Kivity
[not found] ` <4795F1B7.9050604-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-22 14:56 ` Andrea Arcangeli
[not found] ` <20080122145631.GG7331-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-22 16:17 ` Avi Kivity
[not found] ` <47961722.2010804-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-22 17:18 ` Andrea Arcangeli
[not found] ` <20080122171806.GH7331-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-22 20:03 ` Andrea Arcangeli [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080122200352.GA15848@v2.random \
--to=andrea-atkuwr5tajbwk0htik3j/w@public.gmane.org \
--cc=avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org \
--cc=kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org \
--cc=marcelo-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.