diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 4ab487c..e7df8fc 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -409,6 +409,7 @@ struct kvm_memory_slot { unsigned long *rmap; unsigned long *dirty_bitmap; int user_alloc; /* user allocated memory */ + unsigned long userspace_addr; }; @@ -561,8 +562,9 @@ static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva); struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); -extern hpa_t bad_page_address; +extern struct page *bad_page; +int is_error_page(struct page *page); gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 0b2894a..dde8497 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -325,13 +325,13 @@ static void kvm_free_userspace_physmem(struct kvm_memory_slot *free) { int i; - for (i = 0; i < free->npages; ++i) { + /*for (i = 0; i < free->npages; ++i) { if (free->phys_mem[i]) { if (!PageReserved(free->phys_mem[i])) SetPageDirty(free->phys_mem[i]); page_cache_release(free->phys_mem[i]); } - } + }*/ } @@ -773,19 +773,8 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, memset(new.phys_mem, 0, npages * sizeof(struct page *)); memset(new.rmap, 0, npages * sizeof(*new.rmap)); if (user_alloc) { - unsigned long pages_num; - new.user_alloc = 1; - down_read(¤t->mm->mmap_sem); - - pages_num = get_user_pages(current, current->mm, - mem->userspace_addr, - npages, 1, 0, new.phys_mem, - NULL); - - up_read(¤t->mm->mmap_sem); - if (pages_num != npages) - goto out_unlock; + new.userspace_addr = kvm_user_memory_region; } else { for (i = 0; i < npages; ++i) { new.phys_mem[i] = alloc_page(GFP_HIGHUSER @@ -1014,6 +1003,12 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) return r; } +int is_error_page(struct page *page) +{ + return page == bad_page; +} +EXPORT_SYMBOL_GPL(is_error_page); + gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) { int i; @@ -1054,8 +1049,27 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) gfn = unalias_gfn(kvm, gfn); slot = __gfn_to_memslot(kvm, gfn); - if (!slot) - return NULL; + if (!slot) { + get_page(bad_page); + return bad_page; + } + if (slot->user_alloc) { + struct page *page[1]; + int npages; + + down_read(¤t->mm->mmap_sem); + npages = get_user_pages(current, current->mm, + slot->userspace_addr + + (gfn - slot->base_gfn) * PAGE_SIZE, 1, + 1, 0, page, NULL); + up_read(¤t->mm->mmap_sem); + if (npages != 1) { + get_page(bad_page); + return bad_page; + } + return page[0]; + } + get_page(slot->phys_mem[gfn - slot->base_gfn]); return slot->phys_mem[gfn - slot->base_gfn]; } EXPORT_SYMBOL_GPL(gfn_to_page); @@ -1075,13 +1089,16 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, struct page *page; page = gfn_to_page(kvm, gfn); - if (!page) + if (is_error_page(page)) { + put_page(page); return -EFAULT; + } page_virt = kmap_atomic(page, KM_USER0); memcpy(data, page_virt + offset, len); kunmap_atomic(page_virt, KM_USER0); + put_page(page); return 0; } EXPORT_SYMBOL_GPL(kvm_read_guest_page); @@ -1113,14 +1130,17 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, struct page *page; page = gfn_to_page(kvm, gfn); - if (!page) + if (is_error_page(page)) { + put_page(page); return -EFAULT; + } page_virt = kmap_atomic(page, KM_USER0); memcpy(page_virt + offset, data, len); kunmap_atomic(page_virt, KM_USER0); mark_page_dirty(kvm, gfn); + put_page(page); return 0; } EXPORT_SYMBOL_GPL(kvm_write_guest_page); @@ -1151,13 +1171,16 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) struct page *page; page = gfn_to_page(kvm, gfn); - if (!page) + if (is_error_page(page)) { + put_page(page); return -EFAULT; + } page_virt = kmap_atomic(page, KM_USER0); memset(page_virt + offset, 0, len); kunmap_atomic(page_virt, KM_USER0); + put_page(page); return 0; } EXPORT_SYMBOL_GPL(kvm_clear_guest_page); @@ -3300,9 +3322,10 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma, pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; page = gfn_to_page(kvm, pgoff); - if (!page) + if (is_error_page(page)) { + put_page(page); return NOPAGE_SIGBUS; - get_page(page); + } if (type != NULL) *type = VM_FAULT_MINOR; @@ -3642,7 +3665,7 @@ static struct sys_device kvm_sysdev = { .cls = &kvm_sysdev_class, }; -hpa_t bad_page_address; +struct page *bad_page; static __init int kvm_init(void) { - static struct page *bad_page; int r; r = kvm_mmu_module_init(); @@ -3782,16 +3802,11 @@ static __init int kvm_init(void) kvm_init_msr_list(); - bad_page = alloc_page(GFP_KERNEL); - - if (bad_page == NULL) { + if ((bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO)) == NULL) { r = -ENOMEM; goto out; } - bad_page_address = page_to_pfn(bad_page) << PAGE_SHIFT; - memset(__va(bad_page_address), 0, PAGE_SIZE); - return 0; out: @@ -3804,9 +3819,12 @@ out4: static __exit void kvm_exit(void) { kvm_exit_debug(); - __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT)); + __free_page(bad_page); kvm_mmu_module_exit(); } module_init(kvm_init) module_exit(kvm_exit) diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 382bd6a..7669af1 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -421,6 +421,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) return; page = page_header(__pa(spte)); rmapp = gfn_to_rmap(kvm, page->gfns[spte - page->spt]); + put_page(pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT)); if (!*rmapp) { printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); BUG(); @@ -823,23 +824,17 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa) __set_bit(slot, &page_head->slot_bitmap); } -hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa) -{ - hpa_t hpa = gpa_to_hpa(vcpu, gpa); - - return is_error_hpa(hpa) ? bad_page_address | (gpa & ~PAGE_MASK): hpa; -} - hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa) { struct page *page; + hpa_t hpa; ASSERT((gpa & HPA_ERR_MASK) == 0); page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); - if (!page) - return gpa | HPA_ERR_MASK; - return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT) - | (gpa & (PAGE_SIZE-1)); + hpa = ((hpa_t)page_to_pfn(page) << PAGE_SHIFT) | (gpa & (PAGE_SIZE-1)); + if (is_error_page(page)) + return hpa | HPA_ERR_MASK; + return hpa; } hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva) @@ -886,6 +881,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; rmap_add(vcpu, &table[index], v >> PAGE_SHIFT); + if(!is_rmap_pte(table[index])) + put_page(pfn_to_page((v & PT64_BASE_ADDR_MASK) + >> PAGE_SHIFT)); return 0; } @@ -900,6 +898,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) 1, 0, &table[index]); if (!new_table) { pgprintk("nonpaging_map: ENOMEM\n"); + put_page(pfn_to_page((v & PT64_BASE_ADDR_MASK) + >> PAGE_SHIFT)); return -ENOMEM; } @@ -1014,8 +1014,11 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK); - if (is_error_hpa(paddr)) + if (is_error_hpa(paddr)) { + put_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK) + >> PAGE_SHIFT)); return 1; + } return nonpaging_map(vcpu, addr & PAGE_MASK, paddr); } @@ -1489,8 +1492,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, printk(KERN_ERR "xx audit error: (%s) levels %d" " gva %lx gpa %llx hpa %llx ent %llx %d\n", audit_msg, vcpu->mmu.root_level, - va, gpa, hpa, ent, - is_shadow_present_pte(ent)); + va, gpa, hpa, ent, is_shadow_present_pte(ent)); else if (ent == shadow_notrap_nonpresent_pte && !is_error_hpa(hpa)) printk(KERN_ERR "audit: (%s) notrap shadow," diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 447d2c3..da02742 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -76,8 +76,6 @@ static int FNAME(walk_addr)(struct guest_walker *walker, struct kvm_vcpu *vcpu, gva_t addr, int write_fault, int user_fault, int fetch_fault) { - hpa_t hpa; - struct kvm_memory_slot *slot; pt_element_t *ptep; pt_element_t root; gfn_t table_gfn; @@ -102,9 +100,8 @@ static int FNAME(walk_addr)(struct guest_walker *walker, walker->table_gfn[walker->level - 1] = table_gfn; pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, walker->level - 1, table_gfn); - slot = gfn_to_memslot(vcpu->kvm, table_gfn); - hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK); - walker->page = pfn_to_page(hpa >> PAGE_SHIFT); + walker->page = gfn_to_page(vcpu->kvm, (root & PT64_BASE_ADDR_MASK) + >> PAGE_SHIFT); walker->table = kmap_atomic(walker->page, KM_USER0); ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || @@ -114,7 +111,6 @@ static int FNAME(walk_addr)(struct guest_walker *walker, for (;;) { int index = PT_INDEX(addr, walker->level); - hpa_t paddr; ptep = &walker->table[index]; walker->index = index; @@ -159,17 +155,19 @@ static int FNAME(walk_addr)(struct guest_walker *walker, walker->inherited_ar &= walker->table[index]; table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; kunmap_atomic(walker->table, KM_USER0); - paddr = safe_gpa_to_hpa(vcpu, table_gfn << PAGE_SHIFT); - walker->page = pfn_to_page(paddr >> PAGE_SHIFT); + put_page(walker->page); + walker->page = gfn_to_page(vcpu->kvm, table_gfn); walker->table = kmap_atomic(walker->page, KM_USER0); --walker->level; - walker->table_gfn[walker->level - 1] = table_gfn; + walker->table_gfn[walker->level - 1 ] = table_gfn; pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, walker->level - 1, table_gfn); } walker->pte = *ptep; - if (walker->page) + if (walker->page) { walker->ptep = NULL; + put_page(walker->page); + } if (walker->table) kunmap_atomic(walker->table, KM_USER0); pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep); @@ -191,6 +189,8 @@ err: walker->error_code |= PFERR_FETCH_MASK; if (walker->table) kunmap_atomic(walker->table, KM_USER0); + if (walker->page) + put_page(walker->page); return 0; } @@ -222,18 +222,23 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, write_fault, user_fault, gfn); if (write_fault && !dirty) { + struct page *page = NULL; + pt_element_t *guest_ent, *tmp = NULL; if (walker->ptep) guest_ent = walker->ptep; else { - tmp = kmap_atomic(walker->page, KM_USER0); + page = gfn_to_page(vcpu->kvm, walker->table_gfn[walker->level - 1]); + tmp = kmap_atomic(page, KM_USER0); guest_ent = &tmp[walker->index]; } *guest_ent |= PT_DIRTY_MASK; - if (!walker->ptep) + if (!walker->ptep) { kunmap_atomic(tmp, KM_USER0); + put_page(page); + } dirty = 1; FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); } @@ -257,6 +262,8 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, if (is_error_hpa(paddr)) { set_shadow_pte(shadow_pte, shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK); + put_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK) + >> PAGE_SHIFT)); return; } @@ -294,9 +301,16 @@ unshadowed: pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte); set_shadow_pte(shadow_pte, spte); page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); - if (!was_rmapped) + if (!was_rmapped) { rmap_add(vcpu, shadow_pte, (gaddr & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); + if (!is_rmap_pte(*shadow_pte)) + put_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK) + >> PAGE_SHIFT)); + } + else + put_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK) + >> PAGE_SHIFT)); if (!ptwrite || !*ptwrite) vcpu->last_pte_updated = shadow_pte; } @@ -518,19 +532,22 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, { int i; pt_element_t *gpt; + struct page *page; if (sp->role.metaphysical || PTTYPE == 32) { nonpaging_prefetch_page(vcpu, sp); return; } - gpt = kmap_atomic(gfn_to_page(vcpu->kvm, sp->gfn), KM_USER0); + page = gfn_to_page(vcpu->kvm, sp->gfn); + gpt = kmap_atomic(page, KM_USER0); for (i = 0; i < PT64_ENT_PER_PAGE; ++i) if (is_present_pte(gpt[i])) sp->spt[i] = shadow_trap_nonpresent_pte; else sp->spt[i] = shadow_notrap_nonpresent_pte; kunmap_atomic(gpt, KM_USER0); + put_page(page); }