From mboxrd@z Thu Jan 1 00:00:00 1970 From: izik eidus Subject: [PATCH] unalias rework Date: Thu, 04 Sep 2008 17:13:20 +0300 Message-ID: <48BFED00.80006@qumranet.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="------------020003090800080503000304" To: kvm@vger.kernel.org Return-path: Received: from il.qumranet.com ([212.179.150.194]:27387 "EHLO il.qumranet.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751864AbYIDONW (ORCPT ); Thu, 4 Sep 2008 10:13:22 -0400 Received: from [172.16.15.154] (izike-laptop.qumranet.com [172.16.15.154]) by il.qumranet.com (Postfix) with ESMTP id 58243250310 for ; Thu, 4 Sep 2008 17:13:20 +0300 (IDT) Sender: kvm-owner@vger.kernel.org List-ID: This is a multi-part message in MIME format. --------------020003090800080503000304 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit --------------020003090800080503000304 Content-Type: text/x-diff; name="0001-KVM-unalias-rework.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="0001-KVM-unalias-rework.patch" >>From 476d23f08738de2396134f5ef41923ad54994972 Mon Sep 17 00:00:00 2001 From: izike Date: Thu, 4 Sep 2008 17:10:04 +0300 Subject: [PATCH] KVM: unalias rework beside make the code less complecated it actually fix bug related to aliased pages inside the shadow code that treated as not aliased. Signed-off-by: Izik Eidus --- arch/ia64/include/asm/kvm_host.h | 1 + arch/ia64/kvm/kvm-ia64.c | 5 -- arch/s390/include/asm/kvm_host.h | 1 + arch/s390/kvm/kvm-s390.c | 5 -- arch/x86/kvm/mmu.c | 5 +-- arch/x86/kvm/vmx.c | 2 +- arch/x86/kvm/vmx.h | 8 +++- arch/x86/kvm/x86.c | 69 ++++++++++++++++------------- include/asm-x86/kvm_host.h | 9 ---- include/linux/kvm_host.h | 4 +- virt/kvm/kvm_main.c | 87 +++++++++++++++++++++++++++++-------- 11 files changed, 118 insertions(+), 78 deletions(-) diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 1efe513..f54d35e 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -35,6 +35,7 @@ #define KVM_MAX_VCPUS 4 #define KVM_MEMORY_SLOTS 32 +#define KVM_ALIAS_SLOTS 0 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 8a3b5fc..1fc2ddc 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1782,11 +1782,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return 0; } -gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) -{ - return gfn; -} - int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 3c55e41..5c0ebd4 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -18,6 +18,7 @@ #define KVM_MAX_VCPUS 64 #define KVM_MEMORY_SLOTS 32 +#define KVM_ALIAS_SLOTS 0 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8b00eb2..135a91a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -684,11 +684,6 @@ void kvm_arch_flush_shadow(struct kvm *kvm) { } -gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) -{ - return gfn; -} - static int __init kvm_s390_init(void) { return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a87a11e..5fbc536 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -436,7 +436,6 @@ static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn) /* * Take gfn and return the reverse mapping to it. - * Note: gfn must be unaliased before this function get called */ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage) @@ -472,7 +471,6 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) if (!is_rmap_pte(*spte)) return; - gfn = unalias_gfn(vcpu->kvm, gfn); sp = page_header(__pa(spte)); sp->gfns[spte - sp->spt] = gfn; rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage); @@ -607,7 +605,6 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) u64 *spte; int write_protected = 0; - gfn = unalias_gfn(kvm, gfn); rmapp = gfn_to_rmap(kvm, gfn, 0); spte = rmap_next(kvm, rmapp, NULL); @@ -2505,7 +2502,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu) continue; slot = gfn_to_memslot(vcpu->kvm, sp->gfn); - gfn = unalias_gfn(vcpu->kvm, sp->gfn); + gfn = sp->gfn; rmapp = &slot->rmap[gfn - slot->base_gfn]; if (*rmapp) printk(KERN_ERR "%s: (%s) shadow page has writable" diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 14671f4..2a50208 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2407,7 +2407,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) { int ret; struct kvm_userspace_memory_region tss_mem = { - .slot = 8, + .slot = TSS_PRIVATE_MEMSLOT, .guest_phys_addr = addr, .memory_size = PAGE_SIZE * 3, .flags = 0, diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 0c22e5f..07b2858 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -349,8 +349,12 @@ enum vmcs_field { #define IA32_FEATURE_CONTROL_LOCKED_BIT 0x1 #define IA32_FEATURE_CONTROL_VMXON_ENABLED_BIT 0x4 -#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 -#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 +#define TSS_PRIVATE_MEMSLOT KVM_MEMORY_SLOTS + \ + KVM_ALIAS_SLOTS +#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT KVM_MEMORY_SLOTS + \ + KVM_ALIAS_SLOTS + 1 +#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT KVM_MEMORY_SLOTS + \ + KVM_ALIAS_SLOTS + 2 #define VMX_NR_VPIDS (1 << 16) #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3f3cb71..b4a1c43 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1708,20 +1708,6 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) return kvm->arch.n_alloc_mmu_pages; } -gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) -{ - int i; - struct kvm_mem_alias *alias; - - for (i = 0; i < kvm->arch.naliases; ++i) { - alias = &kvm->arch.aliases[i]; - if (gfn >= alias->base_gfn - && gfn < alias->base_gfn + alias->npages) - return alias->target_gfn + gfn - alias->base_gfn; - } - return gfn; -} - /* * Set a new alias region. Aliases map a portion of physical memory into * another portion. This is useful for memory windows, for example the PC @@ -1730,8 +1716,11 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, struct kvm_memory_alias *alias) { - int r, n; - struct kvm_mem_alias *p; + int r; + struct kvm_userspace_memory_region alias_mem; + struct kvm_memory_slot *slot; + unsigned long offset_addr; + gfn_t target_base_gfn; r = -EINVAL; /* General sanity checks */ @@ -1747,27 +1736,32 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, if (alias->target_phys_addr + alias->memory_size < alias->target_phys_addr) goto out; - down_write(&kvm->slots_lock); - spin_lock(&kvm->mmu_lock); - p = &kvm->arch.aliases[alias->slot]; - p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; - p->npages = alias->memory_size >> PAGE_SHIFT; - p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; + target_base_gfn = alias->target_phys_addr >> PAGE_SHIFT; + slot = gfn_to_memslot(kvm, target_base_gfn); + if (!slot) + goto out_unlock; - for (n = KVM_ALIAS_SLOTS; n > 0; --n) - if (kvm->arch.aliases[n - 1].npages) - break; - kvm->arch.naliases = n; + if (slot->npages < alias->memory_size >> PAGE_SHIFT) + goto out_unlock; - spin_unlock(&kvm->mmu_lock); - kvm_mmu_zap_all(kvm); + alias_mem.slot = alias->slot + KVM_MEMORY_SLOTS; + alias_mem.guest_phys_addr = alias->guest_phys_addr; + alias_mem.memory_size = alias->memory_size; + offset_addr = target_base_gfn - slot->base_gfn; + offset_addr = offset_addr << PAGE_SHIFT; + alias_mem.userspace_addr = slot->userspace_addr + offset_addr; - up_write(&kvm->slots_lock); + alias_mem.flags = alias->flags; - return 0; + r = __kvm_set_memory_region(kvm, &alias_mem, 1); + if (!r) + kvm_mmu_zap_all(kvm); + +out_unlock: + up_write(&kvm->slots_lock); out: return r; } @@ -1854,7 +1848,10 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, { int r; int n; + int i; struct kvm_memory_slot *memslot; + struct kvm_memory_slot *alias_memslot; + unsigned long size; int is_dirty = 0; down_write(&kvm->slots_lock); @@ -1865,9 +1862,19 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, /* If nothing is dirty, don't bother messing with page tables. */ if (is_dirty) { + memslot = &kvm->memslots[log->slot]; + for (i = KVM_MEMORY_SLOTS; i < KVM_MEMORY_SLOTS + + KVM_ALIAS_SLOTS; ++i) { + alias_memslot = &kvm->memslots[i]; + size = memslot->npages << PAGE_SHIFT; + if (alias_memslot->userspace_addr >= + memslot->userspace_addr && + alias_memslot->userspace_addr < + memslot->userspace_addr + size) + kvm_mmu_slot_remove_write_access(kvm, i); + } kvm_mmu_slot_remove_write_access(kvm, log->slot); kvm_flush_remote_tlbs(kvm); - memslot = &kvm->memslots[log->slot]; n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; memset(memslot->dirty_bitmap, 0, n); } diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 815efc3..00613c5 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -325,12 +325,6 @@ struct kvm_vcpu_arch { u64 mtrr[0x100]; }; -struct kvm_mem_alias { - gfn_t base_gfn; - unsigned long npages; - gfn_t target_gfn; -}; - struct kvm_irq_ack_notifier { struct hlist_node link; unsigned gsi; @@ -352,9 +346,6 @@ struct kvm_assigned_dev_kernel { }; struct kvm_arch{ - int naliases; - struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; - unsigned int n_free_mmu_pages; unsigned int n_requested_mmu_pages; unsigned int n_alloc_mmu_pages; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a18aaad..c82d9e3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -109,7 +109,8 @@ struct kvm { struct mm_struct *mm; /* userspace tied to this vm */ int nmemslots; struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + - KVM_PRIVATE_MEM_SLOTS]; + KVM_PRIVATE_MEM_SLOTS + + KVM_ALIAS_SLOTS]; struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; struct list_head vm_list; struct kvm_io_bus mmio_bus; @@ -175,7 +176,6 @@ int kvm_arch_set_memory_region(struct kvm *kvm, struct kvm_memory_slot old, int user_alloc); void kvm_arch_flush_shadow(struct kvm *kvm); -gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); void kvm_release_page_clean(struct page *page); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index de3b029..5737a53 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -370,10 +370,58 @@ out: return kvm; } +static int is_aliased_slot(struct kvm *kvm, struct kvm_memory_slot *slot) +{ + int i; + + for (i = KVM_MEMORY_SLOTS; i < KVM_MEMORY_SLOTS + KVM_ALIAS_SLOTS; ++i) { + struct kvm_memory_slot *alias_slot = &kvm->memslots[i]; + + if (alias_slot->base_gfn == slot->base_gfn) + return 1; + } + return 0; +} + +static void update_alias_slots(struct kvm *kvm, struct kvm_memory_slot *free) +{ + int i; + + if (is_aliased_slot(kvm, free)) + return; + + for (i = KVM_MEMORY_SLOTS; i < KVM_MEMORY_SLOTS + KVM_ALIAS_SLOTS; + ++i) { + struct kvm_memory_slot *alias_memslot = &kvm->memslots[i]; + unsigned long size = free->npages << PAGE_SHIFT; + + if (alias_memslot->userspace_addr >= free->userspace_addr && + alias_memslot->userspace_addr < free->userspace_addr + + size) { + alias_memslot->flags = free->flags; + if (free->dirty_bitmap) { + unsigned long offset = + alias_memslot->userspace_addr - + free->userspace_addr; + unsigned dirty_offset; + unsigned long bitmap_addr; + + offset = offset >> PAGE_SHIFT; + dirty_offset = ALIGN(offset, BITS_PER_LONG) / 8; + bitmap_addr = (unsigned long) free->dirty_bitmap; + bitmap_addr += dirty_offset; + alias_memslot->dirty_bitmap = (unsigned long *) bitmap_addr; + } else + alias_memslot->dirty_bitmap = NULL; + } + } +} + /* * Free any memory in @free but not in @dont. */ -static void kvm_free_physmem_slot(struct kvm_memory_slot *free, +static void kvm_free_physmem_slot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { if (!dont || free->rmap != dont->rmap) @@ -385,10 +433,16 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free, if (!dont || free->lpage_info != dont->lpage_info) vfree(free->lpage_info); - free->npages = 0; free->dirty_bitmap = NULL; free->rmap = NULL; free->lpage_info = NULL; + +#ifdef CONFIG_X86 + update_alias_slots(kvm, free); + if (dont) + update_alias_slots(kvm, dont); +#endif + free->npages = 0; } void kvm_free_physmem(struct kvm *kvm) @@ -396,7 +450,7 @@ void kvm_free_physmem(struct kvm *kvm) int i; for (i = 0; i < kvm->nmemslots; ++i) - kvm_free_physmem_slot(&kvm->memslots[i], NULL); + kvm_free_physmem_slot(kvm, &kvm->memslots[i], NULL); } static void kvm_destroy_vm(struct kvm *kvm) @@ -466,7 +520,8 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out; if (mem->guest_phys_addr & (PAGE_SIZE - 1)) goto out; - if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) + if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS + + KVM_ALIAS_SLOTS) goto out; if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) goto out; @@ -528,6 +583,7 @@ int __kvm_set_memory_region(struct kvm *kvm, else new.userspace_addr = 0; } + if (npages && !new.lpage_info) { int largepages = npages / KVM_PAGES_PER_HPAGE; if (npages % KVM_PAGES_PER_HPAGE) @@ -577,11 +633,11 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; } - kvm_free_physmem_slot(&old, &new); + kvm_free_physmem_slot(kvm, &old, &new); return 0; out_free: - kvm_free_physmem_slot(&new, &old); + kvm_free_physmem_slot(kvm, &new, &old); out: return r; @@ -668,7 +724,7 @@ int kvm_is_error_hva(unsigned long addr) } EXPORT_SYMBOL_GPL(kvm_is_error_hva); -static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn) +struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) { int i; @@ -681,19 +737,13 @@ static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn) } return NULL; } - -struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) -{ - gfn = unalias_gfn(kvm, gfn); - return __gfn_to_memslot(kvm, gfn); -} +EXPORT_SYMBOL_GPL(gfn_to_memslot); int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) { int i; - gfn = unalias_gfn(kvm, gfn); - for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { + for (i = 0; i < KVM_MEMORY_SLOTS + KVM_ALIAS_SLOTS; ++i) { struct kvm_memory_slot *memslot = &kvm->memslots[i]; if (gfn >= memslot->base_gfn @@ -708,8 +758,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) { struct kvm_memory_slot *slot; - gfn = unalias_gfn(kvm, gfn); - slot = __gfn_to_memslot(kvm, gfn); + slot = gfn_to_memslot(kvm, gfn); if (!slot) return bad_hva(); return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); @@ -959,8 +1008,8 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) { struct kvm_memory_slot *memslot; - gfn = unalias_gfn(kvm, gfn); - memslot = __gfn_to_memslot(kvm, gfn); + memslot = gfn_to_memslot(kvm, gfn); + if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; -- 1.5.4.3 --------------020003090800080503000304--