From: Marcelo Tosatti <marcelo-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org>
To: Andrea Arcangeli <andrea-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
Subject: Re: KVM swapping with mmu notifiers
Date: Mon, 14 Jan 2008 11:45:39 -0200 [thread overview]
Message-ID: <20080114134539.GB15200@dmt> (raw)
In-Reply-To: <20080113133244.GC8736-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
Hi Andrea,
> path for kvm so I guess not worth optimizing in the short/mid term,
> but by optimizing the invalidate_page case we may halve the number of
> tlb flushes for some common case. I leave it for later, the swapping
> is heavily I/O bound anyway so a some more ipi in smp host shouldn't
> be very measurable (on UP host it makes no difference to flush
> multiple times in practice).
>
> Signed-off-by: Andrea Arcangeli <andrea-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
>
> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
> index 4086080..c527d7d 100644
> --- a/arch/x86/kvm/Kconfig
> +++ b/arch/x86/kvm/Kconfig
> @@ -18,6 +18,7 @@ config KVM
> tristate "Kernel-based Virtual Machine (KVM) support"
> depends on ARCH_SUPPORTS_KVM && EXPERIMENTAL
> select PREEMPT_NOTIFIERS
> + select MMU_NOTIFIER
> select ANON_INODES
> ---help---
> Support hosting fully virtualized guest machines using hardware
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 324ff9a..103c270 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -532,6 +532,36 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
> kvm_flush_remote_tlbs(kvm);
> }
>
> +static void unmap_spte(struct kvm *kvm, u64 *spte)
> +{
> + struct page *page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
> + get_page(page);
> + rmap_remove(kvm, spte);
> + set_shadow_pte(spte, shadow_trap_nonpresent_pte);
> + kvm_flush_remote_tlbs(kvm);
> + __free_page(page);
> +}
> +
> +void kvm_rmap_unmap_gfn(struct kvm *kvm, gfn_t gfn)
> +{
> + unsigned long *rmapp;
> + u64 *spte, *curr_spte;
> +
> + spin_lock(&kvm->mmu_lock);
> + gfn = unalias_gfn(kvm, gfn);
> + rmapp = gfn_to_rmap(kvm, gfn);
The alias and memslot maps are protected only by mmap_sem, so you
should make kvm_set_memory_region/set_memory_alias grab the mmu spinlock
in addition to mmap_sem in write mode.
kvm_mmu_zap_all() grabs the mmu lock.. that should probably move up into
the caller.
> +
> + spte = rmap_next(kvm, rmapp, NULL);
> + while (spte) {
> + BUG_ON(!(*spte & PT_PRESENT_MASK));
> + rmap_printk("rmap_swap_page: spte %p %llx\n", spte, *spte);
> + curr_spte = spte;
> + spte = rmap_next(kvm, rmapp, spte);
> + unmap_spte(kvm, curr_spte);
> + }
> + spin_unlock(&kvm->mmu_lock);
> +}
> +
> #ifdef MMU_DEBUG
> static int is_empty_shadow_page(u64 *spt)
> {
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 8a90403..e9a3f6e 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3159,6 +3159,36 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
> free_page((unsigned long)vcpu->arch.pio_data);
> }
>
> +static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
> +{
> + return container_of(mn, struct kvm, mmu_notifier);
> +}
> +
> +void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
> + struct mm_struct *mm,
> + unsigned long address)
> +{
> + struct kvm *kvm = mmu_notifier_to_kvm(mn);
> + gfn_t gfn = hva_to_gfn(kvm, address);
> + BUG_ON(mm != kvm->mm);
> + if (gfn == -1UL)
> + return;
> + kvm_rmap_unmap_gfn(kvm, gfn);
And then you also need to cover "hva_to_gfn()" to happen under the lock.
> +}
> +
> +void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn,
> + struct mm_struct *mm,
> + unsigned long start, unsigned long end)
> +{
> + for (; start < end; start += PAGE_SIZE)
> + kvm_mmu_notifier_invalidate_page(mn, mm, start);
> +}
> +
> +static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
> + .invalidate_range = kvm_mmu_notifier_invalidate_range,
> + .invalidate_page = kvm_mmu_notifier_invalidate_page,
> +};
> +
> struct kvm *kvm_arch_create_vm(void)
> {
> struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
> @@ -3167,6 +3197,7 @@ struct kvm *kvm_arch_create_vm(void)
> return ERR_PTR(-ENOMEM);
>
> INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
> + kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
>
> return kvm;
> }
> diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
> index d6db0de..feacd77 100644
> --- a/include/asm-x86/kvm_host.h
> +++ b/include/asm-x86/kvm_host.h
> @@ -404,6 +404,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu);
> int kvm_mmu_setup(struct kvm_vcpu *vcpu);
> void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
>
> +void kvm_rmap_unmap_gfn(struct kvm *kvm, gfn_t gfn);
> int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
> void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
> void kvm_mmu_zap_all(struct kvm *kvm);
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 2714068..85da7fa 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -117,6 +117,7 @@ struct kvm {
> struct kvm_io_bus pio_bus;
> struct kvm_vm_stat stat;
> struct kvm_arch arch;
> + struct mmu_notifier mmu_notifier;
> };
>
> /* The guest did something we don't support. */
> @@ -163,6 +164,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
> struct kvm_memory_slot old,
> int user_alloc);
> gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
> +gfn_t hva_to_gfn(struct kvm *kvm, unsigned long addr);
> struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
> void kvm_release_page_clean(struct page *page);
> void kvm_release_page_dirty(struct page *page);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 4295623..8f1dd86 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -165,6 +165,7 @@ static struct kvm *kvm_create_vm(void)
>
> kvm->mm = current->mm;
> atomic_inc(&kvm->mm->mm_count);
> + mmu_notifier_register(&kvm->mmu_notifier, kvm->mm);
> spin_lock_init(&kvm->mmu_lock);
> kvm_io_bus_init(&kvm->pio_bus);
> mutex_init(&kvm->lock);
> @@ -454,6 +455,23 @@ static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
> return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
> }
>
> +gfn_t hva_to_gfn(struct kvm *kvm, unsigned long addr)
> +{
> + int i;
> +
> + for (i = 0; i < kvm->nmemslots; i++) {
> + struct kvm_memory_slot *memslot = &kvm->memslots[i];
> + unsigned long start = memslot->userspace_addr;
> + unsigned long end = start + (memslot->npages << PAGE_SHIFT);
> +
> + if (addr >= start && addr < end) {
> + gfn_t gfn_offset = (addr - start) >> PAGE_SHIFT;
> + return memslot->base_gfn + gfn_offset;
> + }
> + }
> + return -1UL;
> +}
> +
> /*
> * Requires current->mm->mmap_sem to be held
> */
>
>
>
> And here a compatibility patch to kvm-userland so the external module
> still compile and runs with older kernels w/o MMU_NOTIFIER patch applied.
>
> Signed-off-by: Andrea Arcangeli <andrea-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
>
> diff --git a/kernel/external-module-compat.h b/kernel/external-module-compat.h
> index 67b9cc4..34ef0a5 100644
> --- a/kernel/external-module-compat.h
> +++ b/kernel/external-module-compat.h
> @@ -17,6 +17,28 @@
> #include <linux/hrtimer.h>
> #include <asm/bitops.h>
>
> +#ifndef CONFIG_MMU_NOTIFIER
> +struct mmu_notifier;
> +
> +struct mmu_notifier_ops {
> + void (*release)(struct mmu_notifier * mn,
> + struct mm_struct *mm);
> + void (*invalidate_page)(struct mmu_notifier * mn,
> + struct mm_struct *mm,
> + unsigned long address);
> + void (*invalidate_range)(struct mmu_notifier * mn,
> + struct mm_struct *mm,
> + unsigned long start, unsigned long end);
> +};
> +
> +struct mmu_notifier {
> + const struct mmu_notifier_ops *ops;
> +};
> +#define mmu_notifier_register(mn, mm) do {} while(0)
> +#define mmu_notifier_unregister(mn) do {} while (0)
> +#define mmu_notifier_release(mm) do {} while (0)
> +#endif
> +
> /*
> * 2.6.16 does not have GFP_NOWAIT
> */
>
>
> Here another patch for kvm-userland where I can't see symmetry between
> the lack of atomic_inc despite mmdrop is still run. I can't possibly
> see how this is supposedly not required when compiled into the kernel
> vs external module. Either atomic_inc is needed in both or none. Even
> if I'm right still this bug wasn't destabilizing because
> atomic_inc_and_dec only fires once in the overflow check, so it
> shouldn't matter to run one mmdrop more than needed, but it's good for
> correctness.
>
> Signed-off-by: Andrea Arcangeli <andrea-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
>
> diff --git a/kernel/hack-module.awk b/kernel/hack-module.awk
> index 5187c96..884bc50 100644
> --- a/kernel/hack-module.awk
> +++ b/kernel/hack-module.awk
> @@ -33,8 +33,6 @@
> vmx_load_host_state = 0
> }
>
> -/atomic_inc\(&kvm->mm->mm_count\);/ { $0 = "//" $0 }
> -
> /^\t\.fault = / {
> fcn = gensub(/,/, "", "g", $3)
> $0 = "\t.VMA_OPS_FAULT(fault) = VMA_OPS_FAULT_FUNC(" fcn "),"
>
>
> I'll post the mmu-notifiers patch (required in the host kernel to run
> the above) separately in CC with more mailing lists because that's not
> KVM code at all and we hope to get it merged in the mainline kernel
> soon after getting feedback on the interface from the other users of
> the mmu notifiers.
>
> Thanks!
> Andrea
>
> -------------------------------------------------------------------------
> Check out the new SourceForge.net Marketplace.
> It's the best place to buy or sell services for
> just about anything Open Source.
> http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
> _______________________________________________
> kvm-devel mailing list
> kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
> https://lists.sourceforge.net/lists/listinfo/kvm-devel
-------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
next prev parent reply other threads:[~2008-01-14 13:45 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-01-13 13:32 KVM swapping with mmu notifiers Andrea Arcangeli
[not found] ` <20080113133244.GC8736-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-13 15:02 ` Anthony Liguori
2008-01-14 13:45 ` Marcelo Tosatti [this message]
2008-01-14 14:06 ` Andrea Arcangeli
2008-01-14 14:09 ` Avi Kivity
[not found] ` <478B6CFF.9070801-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-14 14:24 ` Andrea Arcangeli
[not found] ` <20080114142457.GF7062-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-14 15:43 ` Avi Kivity
[not found] ` <478B833E.1020801-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-14 17:44 ` Andrea Arcangeli
[not found] ` <20080114174447.GA30812-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-15 14:40 ` Avi Kivity
[not found] ` <478CC5D3.2040201-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-15 15:52 ` Andrea Arcangeli
[not found] ` <20080115155253.GA7059-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-15 15:57 ` Avi Kivity
[not found] ` <478CD7CF.3080603-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-15 16:09 ` Andrea Arcangeli
[not found] ` <20080115160936.GC7059-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-20 15:16 ` Avi Kivity
[not found] ` <479365B3.3000600-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-21 11:37 ` Andrea Arcangeli
[not found] ` <20080121113715.GE6970-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-21 12:53 ` Avi Kivity
2008-01-22 13:37 ` Avi Kivity
[not found] ` <4795F1B7.9050604-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-22 14:56 ` Andrea Arcangeli
[not found] ` <20080122145631.GG7331-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-22 16:17 ` Avi Kivity
[not found] ` <47961722.2010804-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-22 17:18 ` Andrea Arcangeli
[not found] ` <20080122171806.GH7331-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2008-01-22 20:03 ` Andrea Arcangeli
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080114134539.GB15200@dmt \
--to=marcelo-bw31mazkks3ytjvyw6ydsg@public.gmane.org \
--cc=andrea-atKUWr5tajBWk0Htik3J/w@public.gmane.org \
--cc=kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox