From: Avi Kivity <avi@redhat.com>
To: Alex Williamson <alex.williamson@redhat.com>
Cc: linux-kernel@vger.kernel.org, kvm@vger.kernel.org,
mtosatti@redhat.com, xiaoguangrong@cn.fujitsu.com
Subject: Re: [RFC PATCH 2/3] kvm: Allow memory slot array to grow on demand
Date: Thu, 24 Feb 2011 12:39:14 +0200 [thread overview]
Message-ID: <4D663552.7070105@redhat.com> (raw)
In-Reply-To: <20110222185512.22026.88579.stgit@s20.home>
On 02/22/2011 08:55 PM, Alex Williamson wrote:
> Remove fixed KVM_MEMORY_SLOTS limit, allowing the slot array
> to grow on demand. Private slots are now allocated at the
> front instead of the end. Only x86 seems to use private slots,
> so this is now zero for all other archs. The memslots pointer
> is already updated using rcu, so changing the size off the
> array when it's replaces is straight forward. x86 also keeps
> a bitmap of slots used by a kvm_mmu_page, which requires a
> shadow tlb flush whenever we increase the number of slots.
> This forces the pages to be rebuilt with the new bitmap size.
>
>
>
> #define KVM_PIO_PAGE_OFFSET 1
> #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
> @@ -207,7 +206,7 @@ struct kvm_mmu_page {
> * One bit set per slot which has memory
> * in this shadow page.
> */
> - DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
> + unsigned long *slot_bitmap;
What about
union {
DECLARE_BITMAP(direct_slot_bitmap, BITS_PER_LONG);
unsigned long *indirect_slot_bitmap;
};
to make the hackery below more explicit?
>
> static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
> {
> + struct kvm_memslots *slots = kvm_memslots(kvm);
> +
> ASSERT(is_empty_shadow_page(sp->spt));
> hlist_del(&sp->hash_link);
> list_del(&sp->link);
> + if (unlikely(slots->nmemslots> sizeof(sp->slot_bitmap) * 8))
> + kfree(sp->slot_bitmap);
> __free_page(virt_to_page(sp->spt));
> if (!sp->role.direct)
> __free_page(virt_to_page(sp->gfns));
> @@ -1048,6 +1052,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
> u64 *parent_pte, int direct)
> {
> struct kvm_mmu_page *sp;
> + struct kvm_memslots *slots = kvm_memslots(vcpu->kvm);
>
> sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp);
> sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
> @@ -1056,7 +1061,16 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
> PAGE_SIZE);
> set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
> list_add(&sp->link,&vcpu->kvm->arch.active_mmu_pages);
> - bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
> +
> + if (unlikely(slots->nmemslots> sizeof(sp->slot_bitmap) * 8)) {
> + sp->slot_bitmap = kzalloc(sizeof(long) *
> + BITS_TO_LONGS(slots->nmemslots),
> + GFP_KERNEL);
> + if (!sp->slot_bitmap)
> + return NULL;
We don't support failing kvm_mmu_get_page(). See
mmu_memory_cache_alloc() and mmu_topup_memory_caches().
> + } else
> + bitmap_zero((void *)&sp->slot_bitmap, slots->nmemslots);
> +
>
>
> static void mmu_convert_notrap(struct kvm_mmu_page *sp)
> @@ -3530,13 +3548,19 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu)
> void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
> {
> struct kvm_mmu_page *sp;
> + struct kvm_memslots *slots = kvm_memslots(kvm);
>
> list_for_each_entry(sp,&kvm->arch.active_mmu_pages, link) {
> int i;
> u64 *pt;
>
> - if (!test_bit(slot, sp->slot_bitmap))
> - continue;
> + if (likely(slots->nmemslots<= sizeof(sp->slot_bitmap) * 8)) {
> + if (!test_bit(slot, (void *)&sp->slot_bitmap))
> + continue;
> + } else {
> + if (!test_bit(slot, sp->slot_bitmap))
> + continue;
> + }
That likely() would fail 100% for certain guests.
Neater to write
slot_bitmap = sp_slot_bitmap(sp);
if (!test_bit(slot, sp_slot_bitmap))
continue;
> +
> +/*
> + * Protect from malicious userspace by putting an upper bound on the number
> + * of memory slots. This is an arbitrarily large number that still allows
> + * us to make pseudo-guarantees about supporting 64 assigned devices with
> + * plenty of slots left over.
> + */
> +#ifndef KVM_MAX_MEM_SLOTS
> + #define KVM_MAX_MEM_SLOTS 512
> +#endif
The increase should be in a separate patch (after we optimize the
search-fail case).
>
> if (!npages) {
> r = -ENOMEM;
> - slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> +
> + nmemslots = (mem->slot>= kvm->memslots->nmemslots) ?
> + mem->slot + 1 : kvm->memslots->nmemslots;
> +
> + slots = kzalloc(sizeof(struct kvm_memslots) +
> + nmemslots * sizeof(struct kvm_memory_slot),
> + GFP_KERNEL);
> if (!slots)
> goto out_free;
> - memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
> - if (mem->slot>= slots->nmemslots)
> - slots->nmemslots = mem->slot + 1;
> + memcpy(slots, kvm->memslots,
> + sizeof(struct kvm_memslots) + kvm->memslots->nmemslots *
> + sizeof(struct kvm_memory_slot));
> + slots->nmemslots = nmemslots;
> slots->generation++;
> slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
>
> @@ -787,12 +797,21 @@ skip_lpage:
> }
>
> r = -ENOMEM;
> - slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> +
> + if (mem->slot>= kvm->memslots->nmemslots) {
> + nmemslots = mem->slot + 1;
> + flush = true;
Isn't flush here a little too agressive? Shouldn't we flush only if we
cross the BITS_PER_LONG threshold?
> + } else
> + nmemslots = kvm->memslots->nmemslots;
> +
> + slots = kzalloc(sizeof(struct kvm_memslots) +
> + nmemslots * sizeof(struct kvm_memory_slot),
> + GFP_KERNEL);
Code duplication -> helper.
> if (!slots)
> goto out_free;
> - memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
> - if (mem->slot>= slots->nmemslots)
> - slots->nmemslots = mem->slot + 1;
> + memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots) +
> + kvm->memslots->nmemslots * sizeof(struct kvm_memory_slot));
> + slots->nmemslots = nmemslots;
> slots->generation++;
>
> /* actual memory is freed via old in kvm_free_physmem_slot below */
> @@ -808,6 +827,9 @@ skip_lpage:
> rcu_assign_pointer(kvm->memslots, slots);
> synchronize_srcu_expedited(&kvm->srcu);
>
> + if (flush)
> + kvm_arch_flush_shadow(kvm);
> +
Need to flush before rcu_assign_pointer() so kvm_mmu_free_page() sees
the old slot count.
But even that is insufficient since we'll create direct and indirect
slot bitmaps concurrently. Need to store whether the bitmap is direct
or not in kvm_mmu_page.
> @@ -1832,6 +1854,8 @@ static long kvm_vm_ioctl(struct file *filp,
> sizeof kvm_userspace_mem))
> goto out;
>
> + kvm_userspace_mem.slot += KVM_PRIVATE_MEM_SLOTS;
> +
Slightly uneasy about this, but no real objection.
--
error compiling committee.c: too many arguments to function
next prev parent reply other threads:[~2011-02-24 10:39 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-02-22 8:08 [PATCH 0/7] KVM: optimize memslots searching and cache GPN to GFN Xiao Guangrong
2011-02-22 8:09 ` [PATCH 1/7] KVM: cleanup memslot_id function Xiao Guangrong
2011-02-22 8:10 ` [PATCH 2/7] KVM: introduce KVM_MEM_SLOTS_NUM macro Xiao Guangrong
2011-02-22 8:11 ` [PATCH 1/3] KVM: introduce memslots_updated function Xiao Guangrong
2011-02-22 8:12 ` [PATCH 4/7] KVM: sort memslots and use binary search to search the right slot Xiao Guangrong
2011-02-22 14:25 ` Avi Kivity
2011-02-22 14:54 ` Alex Williamson
2011-02-22 18:54 ` [RFC PATCH 0/3] Weight-balanced binary tree + KVM growable memory slots using wbtree Alex Williamson
2011-02-22 18:55 ` [RFC PATCH 1/3] Weight-balanced tree Alex Williamson
2011-02-23 13:09 ` Avi Kivity
2011-02-23 17:02 ` Alex Williamson
2011-02-23 17:08 ` Avi Kivity
2011-02-23 20:19 ` Alex Williamson
2011-02-24 23:04 ` Andrew Morton
2011-02-22 18:55 ` [RFC PATCH 2/3] kvm: Allow memory slot array to grow on demand Alex Williamson
2011-02-24 10:39 ` Avi Kivity [this message]
2011-02-24 18:08 ` Alex Williamson
2011-02-27 9:44 ` Avi Kivity
2011-02-22 18:55 ` [RFC PATCH 3/3] kvm: Use weight-balanced tree for memory slot management Alex Williamson
2011-02-22 18:59 ` [RFC PATCH 0/3] Weight-balanced binary tree + KVM growable memory slots using wbtree Alex Williamson
2011-02-23 1:56 ` Alex Williamson
2011-02-23 13:12 ` Avi Kivity
2011-02-23 18:06 ` Alex Williamson
2011-02-23 19:28 ` Alex Williamson
2011-02-24 10:06 ` Avi Kivity
2011-02-24 17:35 ` Alex Williamson
2011-02-27 9:54 ` Avi Kivity
2011-02-28 23:04 ` Alex Williamson
2011-03-01 15:03 ` Avi Kivity
2011-03-01 18:20 ` Alex Williamson
2011-03-02 13:31 ` Avi Kivity
2011-03-01 19:47 ` Marcelo Tosatti
2011-03-02 13:34 ` Avi Kivity
2011-02-24 10:04 ` Avi Kivity
2011-02-23 1:30 ` [PATCH 4/7] KVM: sort memslots and use binary search to search the right slot Xiao Guangrong
2011-02-22 8:13 ` [PATCH 5/7] KVM: cache the last used slot Xiao Guangrong
2011-02-22 14:26 ` Avi Kivity
2011-02-22 8:15 ` [PATCH 6/7] KVM: cleanup traversal used slots Xiao Guangrong
2011-02-22 8:16 ` [PATCH 7/7] KVM: MMU: cache guest page number to guest frame number Xiao Guangrong
2011-02-22 14:32 ` Avi Kivity
2011-02-23 1:38 ` Xiao Guangrong
2011-02-23 9:28 ` Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4D663552.7070105@redhat.com \
--to=avi@redhat.com \
--cc=alex.williamson@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mtosatti@redhat.com \
--cc=xiaoguangrong@cn.fujitsu.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).